MdirReader is the home of Maildir-related code, and -watch is updated to use some of it, as is the InboxWritable->import_maildir (which I'm not sure should live). "lei q --alert=" is slightly changed for CLI-friendliness tr/-/:/ And damn, I just misread "Cwd" as "Covid"; /me goes back to hiding under the bed :< Eric Wong (11): t/thread-index-gap.t: avoid unnecessary map test_common: disable fsync on the command-line where possible t/cgi.t: modernizations and style updates git: ->qx: respect caller's $/ in array context lei: split out MdirReader package, lazy-require earlier t/run.perl: fix for >128 tests use MdirReader in -watch and InboxWritable lei q: prefix --alert ops with ':' instead of '-' t/run.perl: drop Cwd dependency lei: replace "I:"-prefixed info messages with "#" tests: (lei) fixes for TEST_RUN_MODE=0 and oneshot mode MANIFEST | 2 + lib/PublicInbox/Git.pm | 1 - lib/PublicInbox/IPC.pm | 2 + lib/PublicInbox/Import.pm | 6 +-- lib/PublicInbox/InboxWritable.pm | 55 +++++++++------------ lib/PublicInbox/LEI.pm | 21 ++++---- lib/PublicInbox/LeiImport.pm | 25 ++++++---- lib/PublicInbox/LeiOverview.pm | 2 +- lib/PublicInbox/LeiToMail.pm | 26 ++++------ lib/PublicInbox/MdirReader.pm | 39 +++++++++++++++ lib/PublicInbox/TestCommon.pm | 10 +++- lib/PublicInbox/Watch.pm | 6 ++- t/cgi.t | 84 +++++++++++++------------------- t/lei-import.t | 5 +- t/lei-mirror.t | 2 +- t/lei.t | 2 +- t/lei_to_mail.t | 19 ++++++-- t/mdir_reader.t | 22 +++++++++ t/run.perl | 22 ++++----- t/thread-index-gap.t | 10 ++-- 20 files changed, 211 insertions(+), 150 deletions(-) create mode 100644 lib/PublicInbox/MdirReader.pm create mode 100644 t/mdir_reader.t
We only care abount the number of results. --- t/thread-index-gap.t | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/t/thread-index-gap.t b/t/thread-index-gap.t index 83c3707d..125c5cbd 100644 --- a/t/thread-index-gap.t +++ b/t/thread-index-gap.t @@ -47,14 +47,12 @@ for my $msgs (['orig', reverse @msgs], ['shuffle', shuffle(@msgs)]) { my $over = $ibx->over; my $dbh = $over->dbh; my $tid = $dbh->selectall_arrayref('SELECT DISTINCT(tid) FROM over'); - my @tid = map { $_->[0] } @$tid; - is(scalar(@tid), 1, "only one thread initially ($desc)"); + is(scalar(@$tid), 1, "only one thread initially ($desc)"); $over->dbh_close; - run_script([qw(-index --reindex --rethread), $ibx->{inboxdir}]) or - BAIL_OUT 'rethread'; + run_script([qw(-index --no-fsync --reindex --rethread), + $ibx->{inboxdir}]) or BAIL_OUT 'rethread'; $tid = $dbh->selectall_arrayref('SELECT DISTINCT(tid) FROM over'); - @tid = map { $_->[0] } @$tid; - is(scalar(@tid), 1, "only one thread after rethread ($desc)"); + is(scalar(@$tid), 1, "only one thread after rethread ($desc)"); } done_testing;
This makes tests faster for users on slow TMPDIR (or not using eatmydata) and forces coverage on a non-default switch. Unfortunately, this doesn't yet cover InboxWritable usage. --- lib/PublicInbox/TestCommon.pm | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm index 2f4ca622..ec9191b6 100644 --- a/lib/PublicInbox/TestCommon.pm +++ b/lib/PublicInbox/TestCommon.pm @@ -269,6 +269,9 @@ sub run_script ($;$$) { die "unable to deal with $ref $redir"; } } + if ($key =~ /-(index|convert|extindex|convert|xcpdb)\z/) { + unshift @argv, '--no-fsync'; + } if ($run_mode == 0) { # spawn an independent new process, like real-world use cases: require PublicInbox::Spawn;
We prefer BAIL_OUT or fail to die in tests (I didn't know BAIL_OUT existed when I started the project). We can also depend on IO::Uncompress::Gunzip being available, We'll keep the cgi_run wrapper since the .cgi could use some coverage and remove the FIXME note. run_script makes tests fast enough. --- t/cgi.t | 84 ++++++++++++++++++++++++--------------------------------- 1 file changed, 35 insertions(+), 49 deletions(-) diff --git a/t/cgi.t b/t/cgi.t index 3818b991..567c2ee0 100644 --- a/t/cgi.t +++ b/t/cgi.t @@ -1,42 +1,36 @@ +#!perl -w # Copyright (C) 2014-2021 all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> -# FIXME: this test is too slow and most non-CGI-requirements -# should be moved over to things which use test_psgi use strict; -use warnings; -use Test::More; -use PublicInbox::Eml; +use v5.10.1; use PublicInbox::TestCommon; -use PublicInbox::Import; +use IO::Uncompress::Gunzip qw(gunzip); require_mods(qw(Plack::Handler::CGI Plack::Util)); +require PublicInbox::Eml; +require PublicInbox::Import; +require PublicInbox::Inbox; +require PublicInbox::InboxWritable; +require PublicInbox::Config; my ($tmpdir, $for_destroy) = tmpdir(); my $home = "$tmpdir/pi-home"; my $pi_home = "$home/.public-inbox"; my $pi_config = "$pi_home/config"; my $maindir = "$tmpdir/main.git"; my $addr = 'test-public@example.com'; - +PublicInbox::Import::init_bare($maindir); { - is(1, mkdir($home, 0755), "setup ~/ for testing"); - is(1, mkdir($pi_home, 0755), "setup ~/.public-inbox"); - PublicInbox::Import::init_bare($maindir); - - open my $fh, '>', "$maindir/description" or die "open: $!\n"; - print $fh "test for public-inbox\n"; - close $fh or die "close: $!\n"; - open $fh, '>>', $pi_config or die; - print $fh <<EOF or die; + mkdir($home, 0755) or BAIL_OUT $!; + mkdir($pi_home, 0755) or BAIL_OUT $!; + open my $fh, '>>', $pi_config or BAIL_OUT $!; + print $fh <<EOF or BAIL_OUT $!; [publicinbox "test"] address = $addr inboxdir = $maindir indexlevel = basic EOF - close $fh or die "close: $!\n"; + close $fh or BAIL_OUT $!; } -use_ok 'PublicInbox::Inbox'; -use_ok 'PublicInbox::InboxWritable'; -use_ok 'PublicInbox::Config'; my $cfg = PublicInbox::Config->new($pi_config); my $ibx = $cfg->lookup_name('test'); my $im = PublicInbox::InboxWritable->new($ibx)->importer(0); @@ -58,7 +52,7 @@ EOF ok($im->add($mime), 'added initial message'); $mime->header_set('Message-ID', '<toobig@example.com>'); - $mime->body_str_set("z\n" x 1024); + $mime->body_set("z\n" x 1024); ok($im->add($mime), 'added big message'); # deliver a reply, too @@ -98,7 +92,7 @@ EOF } # retrieve thread as an mbox -{ +SKIP: { local $ENV{HOME} = $home; my $path = "/test/blahblah\@example.com/t.mbox.gz"; my $res = cgi_run($path); @@ -109,13 +103,10 @@ EOF if ($indexed) { $res = cgi_run($path); like($res->{head}, qr/^Status: 200 /, "search returned mbox"); - eval { - require IO::Uncompress::Gunzip; - my $in = $res->{body}; - my $out; - IO::Uncompress::Gunzip::gunzip(\$in => \$out); - like($out, qr/^From /m, "From lines in mbox"); - }; + my $in = $res->{body}; + my $out; + gunzip(\$in => \$out); + like($out, qr/^From /m, "From lines in mbox"); $res = cgi_run('/test/toobig@example.com/'); like($res->{head}, qr/^Status: 300 /, 'did not index or return >max-size message'); @@ -123,29 +114,24 @@ EOF 'warned about skipping large OID'); } else { like($res->{head}, qr/^Status: 501 /, "search not available"); - SKIP: { skip 'DBD::SQLite not available', 4 }; - } - - my $have_xml_treepp = eval { require XML::TreePP; 1 } if $indexed; - if ($have_xml_treepp) { - $path = "/test/blahblah\@example.com/t.atom"; - $res = cgi_run($path); - like($res->{head}, qr/^Status: 200 /, "atom returned 200"); - like($res->{head}, qr!^Content-Type: application/atom\+xml!m, - "search returned atom"); - my $t = XML::TreePP->new->parse($res->{body}); - is(scalar @{$t->{feed}->{entry}}, 3, "parsed three entries"); - like($t->{feed}->{-xmlns}, qr/\bAtom\b/, - 'looks like an an Atom feed'); - } else { - SKIP: { skip 'DBD::SQLite or XML::TreePP missing', 2 }; + skip('DBD::SQLite not available', 7); # (4 - 1) above, 4 below } + require_mods('XML::TreePP', 4); + $path = "/test/blahblah\@example.com/t.atom"; + $res = cgi_run($path); + like($res->{head}, qr/^Status: 200 /, "atom returned 200"); + like($res->{head}, qr!^Content-Type: application/atom\+xml!m, + "search returned atom"); + my $t = XML::TreePP->new->parse($res->{body}); + is(scalar @{$t->{feed}->{entry}}, 3, "parsed three entries"); + like($t->{feed}->{-xmlns}, qr/\bAtom\b/, + 'looks like an an Atom feed'); } done_testing(); sub cgi_run { - my %env = ( + my $env = { PATH_INFO => $_[0], QUERY_STRING => $_[1] || "", SCRIPT_NAME => '', @@ -154,11 +140,11 @@ sub cgi_run { GATEWAY_INTERFACE => 'CGI/1.1', HTTP_ACCEPT => '*/*', HTTP_HOST => 'test.example.com', - ); + }; my ($in, $out, $err) = ("", "", ""); my $rdr = { 0 => \$in, 1 => \$out, 2 => \$err }; - run_script(['.cgi'], \%env, $rdr); - die "unexpected error: \$?=$? ($err)" if $?; + run_script(['.cgi'], $env, $rdr); + fail "unexpected error: \$?=$? ($err)" if $?; my ($head, $body) = split(/\r\n\r\n/, $out, 2); { head => $head, body => $body, err => $err } }
This could lead to bad results when doing ls-tree -z for v2 import in case there's multiple files. In any case, the `local $/ = "\0"' in Import.pm is also eliminated to reduce potential confusion and surprises. --- lib/PublicInbox/Git.pm | 1 - lib/PublicInbox/Import.pm | 6 ++---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm index ac7ff267..e176921c 100644 --- a/lib/PublicInbox/Git.pm +++ b/lib/PublicInbox/Git.pm @@ -364,7 +364,6 @@ sub popen { sub qx { my $fh = popen(@_); if (wantarray) { - local $/ = "\n"; my @ret = <$fh>; close $fh; # caller should check $? @ret; diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm index a070aa1e..e803ee74 100644 --- a/lib/PublicInbox/Import.pm +++ b/lib/PublicInbox/Import.pm @@ -68,11 +68,9 @@ sub gfi_start { chomp($self->{tip} = $git->qx(qw(rev-parse --revs-only), $ref)); die "fatal: rev-parse --revs-only $ref: \$?=$?" if $?; if ($self->{path_type} ne '2/38' && $self->{tip}) { - local $/ = "\0"; - my @t = $git->qx(qw(ls-tree -r -z --name-only), $ref); + my $t = $git->qx(qw(ls-tree -r -z --name-only), $ref); die "fatal: ls-tree -r -z --name-only $ref: \$?=$?" if $?; - chomp @t; - $self->{-tree} = { map { $_ => 1 } @t }; + $self->{-tree} = { map { $_ => 1 } split(/\0/, $t) }; } $in_r = $self->{in} = $git->popen(qw(fast-import --quiet --done --date-format=raw),
We'll do more requires in the top-level lei-daemon process to save work in workers. We can also work towards aborting on user errors in lei-daemon rather than worker processes. "lei import -f mbox*" is finally tested inside t/lei_to_mail.t --- MANIFEST | 1 + lib/PublicInbox/LeiImport.pm | 25 +++++++++++++++---------- lib/PublicInbox/LeiToMail.pm | 26 ++++++++++---------------- lib/PublicInbox/MdirReader.pm | 21 +++++++++++++++++++++ lib/PublicInbox/TestCommon.pm | 4 +++- t/lei-import.t | 5 ++++- t/lei_to_mail.t | 19 ++++++++++++++++--- 7 files changed, 70 insertions(+), 31 deletions(-) create mode 100644 lib/PublicInbox/MdirReader.pm diff --git a/MANIFEST b/MANIFEST index 7f417743..6b3fc812 100644 --- a/MANIFEST +++ b/MANIFEST @@ -199,6 +199,7 @@ lib/PublicInbox/ManifestJsGz.pm lib/PublicInbox/Mbox.pm lib/PublicInbox/MboxGz.pm lib/PublicInbox/MboxReader.pm +lib/PublicInbox/MdirReader.pm lib/PublicInbox/MiscIdx.pm lib/PublicInbox/MiscSearch.pm lib/PublicInbox/MsgIter.pm diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm index a63bfdfd..8358d9d4 100644 --- a/lib/PublicInbox/LeiImport.pm +++ b/lib/PublicInbox/LeiImport.pm @@ -6,7 +6,6 @@ package PublicInbox::LeiImport; use strict; use v5.10.1; use parent qw(PublicInbox::IPC); -use PublicInbox::MboxReader; use PublicInbox::Eml; use PublicInbox::InboxWritable qw(eml_from_path); use PublicInbox::PktOp; @@ -37,8 +36,17 @@ sub call { # the main "lei import" method $lei->{opt}->{kw} //= 1; my $fmt = $lei->{opt}->{'format'}; my $self = $lei->{imp} = bless {}, $cls; - if (my @f = grep { -f } @argv && !$fmt) { - return $lei->fail("--format unset for regular files:\n@f"); + my @f; + for my $x (@argv) { + if (-f $x) { push @f, $x } + elsif (-d _) { require PublicInbox::MdirReader } + } + (@f && !$fmt) and + return $lei->fail("--format unset for regular file(s):\n@f"); + if (@f && $fmt ne 'eml') { + require PublicInbox::MboxReader; + PublicInbox::MboxReader->can($fmt) or + return $lei->fail( "--format=$fmt unrecognized\n"); } $self->{0} = $lei->{0} if $lei->{opt}->{stdin}; my $ops = { @@ -83,11 +91,9 @@ error reading $x: $! my $eml = PublicInbox::Eml->new(\$buf); _import_eml($eml, $lei->{sto}, $set_kw); - } else { # some mbox - my $cb = PublicInbox::MboxReader->can($fmt); - $cb or return $lei->child_error(1 >> 8, <<""); ---format $fmt unsupported for $x - + } else { # some mbox (->can already checked in call); + my $cb = PublicInbox::MboxReader->can($fmt) // + die "BUG: bad fmt=$fmt"; $cb->(undef, $fh, \&_import_eml, $lei->{sto}, $set_kw); } }; @@ -109,8 +115,7 @@ unable to open $x: $! _import_fh($lei, $fh, $x); } elsif (-d _ && (-d "$x/cur" || -d "$x/new")) { - require PublicInbox::LeiToMail; - PublicInbox::LeiToMail::maildir_each_file($x, + PublicInbox::MdirReader::maildir_each_file($x, \&_import_maildir, $lei->{sto}, $lei->{opt}->{kw}); } else { diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm index a5a196db..e3e512be 100644 --- a/lib/PublicInbox/LeiToMail.pm +++ b/lib/PublicInbox/LeiToMail.pm @@ -18,6 +18,7 @@ use Symbol qw(gensym); use IO::Handle; # ->autoflush use Fcntl qw(SEEK_SET SEEK_END O_CREAT O_EXCL O_WRONLY); use Errno qw(EEXIST ESPIPE ENOENT EPIPE); +my ($maildir_each_file); # struggles with short-lived repos, Gcf2Client makes little sense with lei; # but we may use in-process libgit2 in the future. @@ -266,18 +267,6 @@ sub _mbox_write_cb ($$) { } } -sub maildir_each_file ($$;@) { - my ($dir, $cb, @arg) = @_; - $dir .= '/' unless substr($dir, -1) eq '/'; - for my $d (qw(new/ cur/)) { - my $pfx = $dir.$d; - opendir my $dh, $pfx or next; - while (defined(my $fn = readdir($dh))) { - $cb->($pfx.$fn, @arg) if $fn =~ /:2,[A-Za-z]*\z/; - } - } -} - sub _augment_file { # maildir_each_file cb my ($f, $lei) = @_; my $eml = PublicInbox::InboxWritable::eml_from_path($f) or return; @@ -354,11 +343,18 @@ sub new { my $dst = $lei->{ovv}->{dst}; my $self = bless {}, $cls; if ($fmt eq 'maildir') { + $maildir_each_file //= do { + require PublicInbox::MdirReader; + PublicInbox::MdirReader->can('maildir_each_file'); + }; + $lei->{opt}->{augment} and + require PublicInbox::InboxWritable; # eml_from_path $self->{base_type} = 'maildir'; -e $dst && !-d _ and die "$dst exists and is not a directory\n"; $lei->{ovv}->{dst} = $dst .= '/' if substr($dst, -1) ne '/'; } elsif (substr($fmt, 0, 4) eq 'mbox') { + require PublicInbox::MboxReader if $lei->{opt}->{augment}; (-d $dst || (-e _ && !-w _)) and die "$dst exists and is not a writable file\n"; $self->can("eml2$fmt") or die "bad mbox --format=$fmt\n"; @@ -389,12 +385,11 @@ sub _do_augment_maildir { if ($lei->{opt}->{augment}) { my $dedupe = $lei->{dedupe}; if ($dedupe && $dedupe->prepare_dedupe) { - require PublicInbox::InboxWritable; # eml_from_path - maildir_each_file($dst, \&_augment_file, $lei); + $maildir_each_file->($dst, \&_augment_file, $lei); $dedupe->pause_dedupe; } } else { # clobber existing Maildir - maildir_each_file($dst, \&_unlink); + $maildir_each_file->($dst, \&_unlink); } } @@ -435,7 +430,6 @@ sub _do_augment_mbox { my $rd = $zsfx ? decompress_src($out, $zsfx, $lei) : dup_src($out); my $fmt = $lei->{ovv}->{fmt}; - require PublicInbox::MboxReader; PublicInbox::MboxReader->$fmt($rd, \&_augment, $lei); } # maybe some systems don't honor O_APPEND, Perl does this: diff --git a/lib/PublicInbox/MdirReader.pm b/lib/PublicInbox/MdirReader.pm new file mode 100644 index 00000000..c6a0e7a8 --- /dev/null +++ b/lib/PublicInbox/MdirReader.pm @@ -0,0 +1,21 @@ +# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> + +# Maildirs for now, MH eventually +package PublicInbox::MdirReader; +use strict; +use v5.10.1; + +sub maildir_each_file ($$;@) { + my ($dir, $cb, @arg) = @_; + $dir .= '/' unless substr($dir, -1) eq '/'; + for my $d (qw(new/ cur/)) { + my $pfx = $dir.$d; + opendir my $dh, $pfx or next; + while (defined(my $fn = readdir($dh))) { + $cb->($pfx.$fn, @arg) if $fn =~ /:2,[A-Za-z]*\z/; + } + } +} + +1; diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm index ec9191b6..53f13437 100644 --- a/lib/PublicInbox/TestCommon.pm +++ b/lib/PublicInbox/TestCommon.pm @@ -14,7 +14,7 @@ BEGIN { @EXPORT = qw(tmpdir tcp_server tcp_connect require_git require_mods run_script start_script key2sub xsys xsys_e xqx eml_load tick have_xapian_compact json_utf8 setup_public_inboxes - tcp_host_port test_lei $lei $lei_out $lei_err $lei_opt); + tcp_host_port test_lei lei $lei $lei_out $lei_err $lei_opt); require Test::More; my @methods = grep(!/\W/, @Test::More::EXPORT); eval(join('', map { "*$_=\\&Test::More::$_;" } @methods)); @@ -457,6 +457,8 @@ our $lei = sub { $res; }; +sub lei (@) { $lei->(@_) } + sub json_utf8 () { state $x = ref(PublicInbox::Config->json)->new->utf8->canonical; } diff --git a/t/lei-import.t b/t/lei-import.t index 709d89fa..b691798a 100644 --- a/t/lei-import.t +++ b/t/lei-import.t @@ -3,12 +3,14 @@ # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> use strict; use v5.10.1; use PublicInbox::TestCommon; test_lei(sub { +ok(!$lei->(qw(import -f bogus), 't/plack-qp.eml'), 'fails with bogus format'); +like($lei_err, qr/\bbogus unrecognized/, 'gave error message'); ok($lei->(qw(q s:boolean)), 'search miss before import'); unlike($lei_out, qr/boolean/i, 'no results, yet'); open my $fh, '<', 't/data/0001.patch' or BAIL_OUT $!; ok($lei->([qw(import -f eml -)], undef, { %$lei_opt, 0 => $fh }), - 'import single file from stdin'); + 'import single file from stdin') or diag $lei_err; close $fh; ok($lei->(qw(q s:boolean)), 'search hit after import'); ok($lei->(qw(import -f eml), 't/data/message_embed.eml'), @@ -35,5 +37,6 @@ $res = json_utf8->decode($lei_out); is($res->[1], undef, 'only one result'); is_deeply($res->[0]->{kw}, [], 'no keywords set'); +# see t/lei_to_mail.t for "import -f mbox*" }); done_testing; diff --git a/t/lei_to_mail.t b/t/lei_to_mail.t index a25795ca..77e9902e 100644 --- a/t/lei_to_mail.t +++ b/t/lei_to_mail.t @@ -10,6 +10,7 @@ use Fcntl qw(SEEK_SET); use PublicInbox::Spawn qw(popen_rd which); use List::Util qw(shuffle); require_mods(qw(DBD::SQLite)); +require PublicInbox::MdirReader; require PublicInbox::MboxReader; require PublicInbox::LeiOverview; require PublicInbox::LEI; @@ -127,6 +128,17 @@ my $orig = do { is(do { local $/; <$fh> }, $raw, 'jobs > 1'); $raw; }; + +test_lei(sub { + ok(lei(qw(import -f), $mbox, $fn), 'imported mbox'); + ok(lei(qw(q s:x)), 'lei q works') or diag $lei_err; + my $res = json_utf8->decode($lei_out); + my $x = $res->[0]; + is($x->{'s'}, 'x', 'subject imported') or diag $lei_out; + is_deeply($x->{'kw'}, ['seen'], 'kw imported') or diag $lei_out; + is($res->[1], undef, 'only one result'); +}); + for my $zsfx (qw(gz bz2 xz)) { # XXX should we support zst, zz, lzo, lzma? my $zsfx2cmd = PublicInbox::LeiToMail->can('zsfx2cmd'); SKIP: { @@ -230,6 +242,7 @@ SKIP: { # FIFO support } { # Maildir support + my $each_file = PublicInbox::MdirReader->can('maildir_each_file'); my $md = "$tmpdir/maildir/"; my $wcb = $wcb_get->('maildir', $md); is(ref($wcb), 'CODE', 'got Maildir callback'); @@ -237,7 +250,7 @@ SKIP: { # FIFO support $wcb->(\(my $x = $buf), $b4dc0ffee); my @f; - PublicInbox::LeiToMail::maildir_each_file($md, sub { push @f, shift }); + $each_file->($md, sub { push @f, shift }); open my $fh, $f[0] or BAIL_OUT $!; is(do { local $/; <$fh> }, $buf, 'wrote to Maildir'); @@ -246,7 +259,7 @@ SKIP: { # FIFO support $wcb->(\($x = $buf."\nx\n"), $deadcafe); my @x = (); - PublicInbox::LeiToMail::maildir_each_file($md, sub { push @x, shift }); + $each_file->($md, sub { push @x, shift }); is(scalar(@x), 1, 'wrote one new file'); ok(!-f $f[0], 'old file clobbered'); open $fh, $x[0] or BAIL_OUT $!; @@ -257,7 +270,7 @@ SKIP: { # FIFO support $wcb->(\($x = $buf."\ny\n"), $deadcafe); $wcb->(\($x = $buf."\ny\n"), $b4dc0ffee); # skipped by dedupe @f = (); - PublicInbox::LeiToMail::maildir_each_file($md, sub { push @f, shift }); + $each_file->($md, sub { push @f, shift }); is(scalar grep(/\A\Q$x[0]\E\z/, @f), 1, 'old file still there'); my @new = grep(!/\A\Q$x[0]\E\z/, @f); is(scalar @new, 1, '1 new file written (b4dc0ffee skipped)');
We need to explicitly close the write-end of the pipe in workers to ensure they don't prevent each other from seeing EOF. Also, make a note to keep using the pipe for now since Linux <3.14 had broken read(2) semantics when file descriptions are shared across threads/processes. --- t/run.perl | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/t/run.perl b/t/run.perl index 96db3045..d0b29e68 100755 --- a/t/run.perl +++ b/t/run.perl @@ -127,9 +127,10 @@ my $producer = $$; my $eof; # we stop respawning if true my $start_worker = sub { - my ($i, $j, $rd, $todo) = @_; + my ($j, $rd, $wr, $todo) = @_; my $pid = fork // DIE "fork: $!"; if ($pid == 0) { + close $wr if $wr; $worker = $$; while (1) { my $r = sysread($rd, my $buf, UINT_SIZE); @@ -155,15 +156,16 @@ my $start_worker = sub { for (my $i = $repeat; $i != 0; $i--) { my @todo = $shuffle ? List::Util::shuffle(@tests) : @tests; - # single-producer, multi-consumer queue relying on POSIX semantics + # single-producer, multi-consumer queue relying on POSIX pipe semantics + # POSIX.1-2008 stipulates a regular file should work, but Linux <3.14 + # had broken read(2) semantics according to the read(2) manpage pipe(my ($rd, $wr)) or DIE "pipe: $!"; # fill the queue before forking so children can start earlier my $n = (_POSIX_PIPE_BUF / UINT_SIZE); if ($n >= $#todo) { print $wr join('', map { pack('I', $_) } (0..$#todo)) or DIE; - close $wr or die; - $wr = undef; + undef $wr; } else { # write what we can... $wr->autoflush(1); print $wr join('', map { pack('I', $_) } (0..$n)) or DIE; @@ -186,22 +188,21 @@ for (my $i = $repeat; $i != 0; $i--) { # skip_all can exit(0), respawn if needed: if (!$eof) { print $OLDERR "# respawning job[$j]\n"; - $start_worker->($i, $j, $rd, \@todo); + $start_worker->($j, $rd, $wr, \@todo); } } }; # start the workers to consume the queue for (my $j = 0; $j < $jobs; $j++) { - $start_worker->($i, $j, $rd, \@todo); + $start_worker->($j, $rd, $wr, \@todo); } - if ($wr) { local $SIG{CHLD} = $sigchld; # too many tests to fit in the pipe before starting workers, # send the rest now the workers are running print $wr join('', map { pack('I', $_) } ($n..$#todo)) or DIE; - close $wr or die; + undef $wr; } $sigchld->(0) while scalar(keys(%pids));
MdirReader now handles files in "$MAILDIR/new" properly and is stricter about what it accepts. eml_from_path is also made robust against FIFOs while eliminating TOCTOU races with between stat(2) and open(2) calls. --- MANIFEST | 1 + lib/PublicInbox/InboxWritable.pm | 55 +++++++++++++------------------- lib/PublicInbox/MdirReader.pm | 22 +++++++++++-- lib/PublicInbox/Watch.pm | 6 ++-- t/mdir_reader.t | 22 +++++++++++++ 5 files changed, 69 insertions(+), 37 deletions(-) create mode 100644 t/mdir_reader.t diff --git a/MANIFEST b/MANIFEST index 6b3fc812..f8ee6998 100644 --- a/MANIFEST +++ b/MANIFEST @@ -376,6 +376,7 @@ t/mbox_reader.t t/mda-mime.eml t/mda.t t/mda_filter_rubylang.t +t/mdir_reader.t t/mid.t t/mime.t t/miscsearch.t diff --git a/lib/PublicInbox/InboxWritable.pm b/lib/PublicInbox/InboxWritable.pm index 3a4012cd..c3acc4f9 100644 --- a/lib/PublicInbox/InboxWritable.pm +++ b/lib/PublicInbox/InboxWritable.pm @@ -10,6 +10,7 @@ use PublicInbox::Import; use PublicInbox::Filter::Base qw(REJECT); use Errno qw(ENOENT); our @EXPORT_OK = qw(eml_from_path); +use Fcntl qw(O_RDONLY O_NONBLOCK); use constant { PERM_UMASK => 0, @@ -118,25 +119,10 @@ sub filter { undef; } -sub is_maildir_basename ($) { - my ($bn) = @_; - return 0 if $bn !~ /\A[a-zA-Z0-9][\-\w:,=\.]+\z/; - if ($bn =~ /:2,([A-Z]+)\z/i) { - my $flags = $1; - return 0 if $flags =~ /[DT]/; # no [D]rafts or [T]rashed mail - } - 1; -} - -sub is_maildir_path ($) { - my ($path) = @_; - my @p = split(m!/+!, $path); - (is_maildir_basename($p[-1]) && -f $path) ? 1 : 0; -} - sub eml_from_path ($) { my ($path) = @_; - if (open my $fh, '<', $path) { + if (sysopen(my $fh, $path, O_RDONLY|O_NONBLOCK)) { + return unless -f $fh; # no FIFOs or directories my $str = do { local $/; <$fh> } or return; PublicInbox::Eml->new(\$str); } else { # ENOENT is common with Maildir @@ -145,27 +131,30 @@ sub eml_from_path ($) { } } +sub _each_maildir_fn { + my ($fn, $im, $self) = @_; + if ($fn =~ /:2,([A-Za-z]*)\z/) { + my $fl = $1; + return if $fl =~ /[DT]/; # no Drafts or Trash for public + } + my $eml = eml_from_path($fn) or return; + if ($self && (my $filter = $self->filter($im))) { + my $ret = $filter->scrub($eml) or return; + return if $ret == REJECT(); + $eml = $ret; + } + $im->add($eml); +} + sub import_maildir { my ($self, $dir) = @_; - my $im = $self->importer(1); - foreach my $sub (qw(cur new tmp)) { -d "$dir/$sub" or die "$dir is not a Maildir (missing $sub)\n"; } - foreach my $sub (qw(cur new)) { - opendir my $dh, "$dir/$sub" or die "opendir $dir/$sub: $!\n"; - while (defined(my $fn = readdir($dh))) { - next unless is_maildir_basename($fn); - my $mime = eml_from_path("$dir/$fn") or next; - - if (my $filter = $self->filter($im)) { - my $ret = $filter->scrub($mime) or return; - return if $ret == REJECT(); - $mime = $ret; - } - $im->add($mime); - } - } + my $im = $self->importer(1); + my @self = $self->filter($im) ? ($self) : (); + PublicInbox::MdirReader::maildir_each_file(\&_each_maildir_fn, + $im, @self); $im->done; } diff --git a/lib/PublicInbox/MdirReader.pm b/lib/PublicInbox/MdirReader.pm index c6a0e7a8..e0ff676d 100644 --- a/lib/PublicInbox/MdirReader.pm +++ b/lib/PublicInbox/MdirReader.pm @@ -2,18 +2,36 @@ # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> # Maildirs for now, MH eventually +# ref: https://cr.yp.to/proto/maildir.html +# https://wiki2.dovecot.org/MailboxFormat/Maildir package PublicInbox::MdirReader; use strict; use v5.10.1; +# returns Maildir flags from a basename ('' for no flags, undef for invalid) +sub maildir_basename_flags { + my (@f) = split(/:/, $_[0], -1); + return if (scalar(@f) > 2 || substr($f[0], 0, 1) eq '.'); + $f[1] // return ''; # "new" + $f[1] =~ /\A2,([A-Za-z]*)\z/ ? $1 : undef; # "cur" +} + +# same as above, but for full path name +sub maildir_path_flags { + my ($f) = @_; + my $i = rindex($f, '/'); + $i >= 0 ? maildir_basename_flags(substr($f, $i + 1)) : undef; +} + sub maildir_each_file ($$;@) { my ($dir, $cb, @arg) = @_; $dir .= '/' unless substr($dir, -1) eq '/'; for my $d (qw(new/ cur/)) { my $pfx = $dir.$d; opendir my $dh, $pfx or next; - while (defined(my $fn = readdir($dh))) { - $cb->($pfx.$fn, @arg) if $fn =~ /:2,[A-Za-z]*\z/; + while (defined(my $bn = readdir($dh))) { + maildir_basename_flags($bn) // next; + $cb->($pfx.$bn, @arg); } } } diff --git a/lib/PublicInbox/Watch.pm b/lib/PublicInbox/Watch.pm index 1835fa0e..a4302162 100644 --- a/lib/PublicInbox/Watch.pm +++ b/lib/PublicInbox/Watch.pm @@ -2,12 +2,13 @@ # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> # # ref: https://cr.yp.to/proto/maildir.html -# http://wiki2.dovecot.org/MailboxFormat/Maildir +# httsp://wiki2.dovecot.org/MailboxFormat/Maildir package PublicInbox::Watch; use strict; use v5.10.1; use PublicInbox::Eml; use PublicInbox::InboxWritable qw(eml_from_path); +use PublicInbox::MdirReader; use PublicInbox::Filter::Base qw(REJECT); use PublicInbox::Spamcheck; use PublicInbox::Sigfd; @@ -207,7 +208,8 @@ sub import_eml ($$$) { sub _try_path { my ($self, $path) = @_; - return unless PublicInbox::InboxWritable::is_maildir_path($path); + my $fl = PublicInbox::MdirReader::maildir_path_flags($path) // return; + return if $fl =~ /[DT]/; # no Drafts or Trash if ($path !~ $self->{mdre}) { warn "unrecognized path: $path\n"; return; diff --git a/t/mdir_reader.t b/t/mdir_reader.t new file mode 100644 index 00000000..51b38af4 --- /dev/null +++ b/t/mdir_reader.t @@ -0,0 +1,22 @@ +#!perl -w +# Copyright (C) 2021 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +use PublicInbox::TestCommon; +require_ok 'PublicInbox::MdirReader'; +*maildir_basename_flags = \&PublicInbox::MdirReader::maildir_basename_flags; +*maildir_path_flags = \&PublicInbox::MdirReader::maildir_path_flags; + +is(maildir_basename_flags('foo'), '', 'new valid name accepted'); +is(maildir_basename_flags('foo:2,'), '', 'cur valid name accepted'); +is(maildir_basename_flags('foo:2,bar'), 'bar', 'flags name accepted'); +is(maildir_basename_flags('.foo:2,bar'), undef, 'no hidden files'); +is(maildir_basename_flags('fo:o:2,bar'), undef, 'no extra colon'); +is(maildir_path_flags('/path/to/foo:2,S'), 'S', 'flag returned for path'); +is(maildir_path_flags('/path/to/.foo:2,S'), undef, 'no hidden paths'); +is(maildir_path_flags('/path/to/foo:2,'), '', 'no flags in path'); + +# not sure if there's a better place for eml_from_path +use_ok 'PublicInbox::InboxWritable', qw(eml_from_path); +is(eml_from_path('.'), undef, 'eml_from_path fails on directory'); + +done_testing;
Using dashed keywords confuses the option parser without "=" signs (and bash completion doesn't yet work with "="). So use ":" instead of "-" as the prefix for internal ops, since ":" is just as unlikely to be the first character of an executable file in a user's $PATH. --- lib/PublicInbox/LEI.pm | 8 ++++---- lib/PublicInbox/LeiOverview.pm | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index e2a945a4..e29b13c3 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -227,9 +227,9 @@ my %OPTDESC = ( 'show threads|t' => 'display entire thread a message belongs to', 'q threads|t' => 'return all messages in the same threads as the actual match(es)', -'alert=s@' => ['CMD,-WINCH,-bell,<any command>', +'alert=s@' => ['CMD,:WINCH,:bell,<any command>', 'run command(s) or perform ops when done writing to output ' . - '(default: "-WINCH,-bell" with --mua and Maildir/IMAP output, ' . + '(default: ":WINCH,:bell" with --mua and Maildir/IMAP output, ' . 'nothing otherwise)' ], 'augment|a' => 'augment --output destination instead of clobbering', @@ -758,14 +758,14 @@ sub poke_mua { # forces terminal MUAs to wake up and hopefully notice new mail my ($self) = @_; my $alerts = $self->{opt}->{alert} // return; while (my $op = shift(@$alerts)) { - if ($op eq '-WINCH') { + if ($op eq ':WINCH') { # hit the process group that started the MUA if ($self->{sock}) { send($self->{sock}, '-WINCH', MSG_EOR); } elsif ($self->{oneshot}) { kill('-WINCH', $$); } - } elsif ($op eq '-bell') { + } elsif ($op eq ':bell') { out($self, "\a"); } elsif ($op =~ /(?<!\\),/) { # bare ',' (not ',,') push @$alerts, split(/(?<!\\),/, $op); diff --git a/lib/PublicInbox/LeiOverview.pm b/lib/PublicInbox/LeiOverview.pm index 98c89d12..c820f0d7 100644 --- a/lib/PublicInbox/LeiOverview.pm +++ b/lib/PublicInbox/LeiOverview.pm @@ -100,7 +100,7 @@ sub new { return $lei->fail($@) if $@; if ($opt->{mua} && $lei->{l2m}->lock_free) { $lei->{early_mua} = 1; - $opt->{alert} //= [ '-WINCH,-bell' ] if -t $lei->{1}; + $opt->{alert} //= [ ':WINCH,:bell' ] if -t $lei->{1}; } } $self;
Perl 5.8.8/5.10.0+ can use fchdir(), and we depend on 5.10.1+ --- t/run.perl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/t/run.perl b/t/run.perl index d0b29e68..e8512e18 100755 --- a/t/run.perl +++ b/t/run.perl @@ -14,7 +14,6 @@ use strict; use v5.10.1; use IO::Handle; # ->autoflush use PublicInbox::TestCommon; -use Cwd qw(getcwd); use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); use Errno qw(EINTR); use Fcntl qw(:seek); @@ -33,7 +32,7 @@ if (($ENV{TEST_RUN_MODE} // 2) == 0) { die "$0 is not compatible with TEST_RUN_MODE=0\n"; } my @tests = scalar(@ARGV) ? @ARGV : glob('t/*.t'); -my $cwd = getcwd(); +open my $cwd_fh, '<', '.' or die "open .: $!"; open my $OLDOUT, '>&STDOUT' or die "dup STDOUT: $!"; open my $OLDERR, '>&STDERR' or die "dup STDERR: $!"; $OLDOUT->autoflush(1); @@ -64,7 +63,7 @@ our ($worker, $worker_test); sub test_status () { $? = 255 if $? == 0 && !$tb->is_passing; my $status = $? ? 'not ok' : 'ok'; - chdir($cwd) or DIE "chdir($cwd): $!"; + chdir($cwd_fh) or DIE "fchdir: $!"; if ($log_suffix ne '') { my $log = $worker_test; $log =~ s/\.t\z/$log_suffix/;
The "#" is what TAP <https://testanything.org/> uses, which is also consistent with what our (and many other) test suites emit. --- lib/PublicInbox/LEI.pm | 6 +++--- t/lei.t | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index e29b13c3..5f265087 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -559,7 +559,7 @@ sub _lei_cfg ($;$) { open my $fh, '>>', $f or die "open($f): $!\n"; @st = stat($fh) or die "fstat($f): $!\n"; $cur_st = pack('dd', $st[10], $st[7]); - qerr($self, "I: $f created") if $self->{cmd} ne 'config'; + qerr($self, "# $f created") if $self->{cmd} ne 'config'; } my $cfg = PublicInbox::Config::git_config_dump($f); $cfg->{-st} = $cur_st; @@ -619,7 +619,7 @@ sub lei_init { my @cur = stat($cur) if defined($cur); $cur = File::Spec->canonpath($cur // $dir); my @dir = stat($dir); - my $exists = "I: leistore.dir=$cur already initialized" if @dir; + my $exists = "# leistore.dir=$cur already initialized" if @dir; if (@cur) { if ($cur eq $dir) { _lei_store($self, 1)->done; @@ -638,7 +638,7 @@ E: leistore.dir=$cur already initialized and it is not $dir } lei_config($self, 'leistore.dir', $dir); _lei_store($self, 1)->done; - $exists //= "I: leistore.dir=$dir newly initialized"; + $exists //= "# leistore.dir=$dir newly initialized"; return qerr($self, $exists); } diff --git a/t/lei.t b/t/lei.t index 8e771eb5..4785acca 100644 --- a/t/lei.t +++ b/t/lei.t @@ -49,7 +49,7 @@ my $test_help = sub { my $ok_err_info = sub { my ($msg) = @_; - is(grep(!/^I:/, split(/^/, $lei_err)), 0, $msg) or + is(grep(!/^#/, split(/^/, $lei_err)), 0, $msg) or diag "$msg: err=$lei_err"; };
DESTROY callbacks can clobber $?, so we must take care to preserve it when exiting. We'll also try to make an effort to ensure better DESTROY ordering and delete as much as possible before x_it finishes. We also need to load PublicInbox::Config when setting up public inboxes. --- lib/PublicInbox/IPC.pm | 2 ++ lib/PublicInbox/LEI.pm | 7 +++++-- lib/PublicInbox/TestCommon.pm | 3 ++- t/lei-mirror.t | 2 +- 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/lib/PublicInbox/IPC.pm b/lib/PublicInbox/IPC.pm index 9331233a..efac4c4d 100644 --- a/lib/PublicInbox/IPC.pm +++ b/lib/PublicInbox/IPC.pm @@ -412,9 +412,11 @@ sub DESTROY { my ($self) = @_; my $ppid = $self->{-wq_ppid}; wq_kill($self) if $ppid && $ppid == $$; + my $err = $?; wq_close($self); wq_wait_old($self); ipc_worker_stop($self); + $? = $err if $err; } sub detect_nproc () { diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index 5f265087..dd831c54 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -336,8 +336,9 @@ sub x_it ($$) { my $wq = delete $self->{$f} or next; $wq->DESTROY; } - # cleanup anything that has tempfiles - delete @$self{qw(ovv dedupe)}; + # cleanup anything that has tempfiles or open file handles + %PATH2CFG = (); + delete @$self{qw(ovv dedupe sto cfg)}; if (my $signum = ($code & 127)) { # usually SIGPIPE (13) $SIG{PIPE} = 'DEFAULT'; # $SIG{$signum} doesn't work kill $signum, $$; @@ -1072,8 +1073,10 @@ sub DESTROY { my ($self) = @_; $self->{1}->autoflush(1) if $self->{1}; stop_pager($self); + my $err = $?; my $oneshot_pids = delete $self->{"pid.$self.$$"} or return; waitpid($_, 0) for keys %$oneshot_pids; + $? = $err if $err; # preserve ->fail or ->x_it code } 1; diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm index 53f13437..63d45ac3 100644 --- a/lib/PublicInbox/TestCommon.pm +++ b/lib/PublicInbox/TestCommon.pm @@ -541,7 +541,6 @@ sub setup_public_inboxes () { my $end = $lk->lock_for_scope; return @ret if -f $stamp; - require PublicInbox::InboxWritable; local $ENV{PI_CONFIG} = $pi_config; for my $V (1, 2) { run_script([qw(-init), "-V$V", "t$V", @@ -549,6 +548,8 @@ sub setup_public_inboxes () { "$test_home/t$V", "http://example.com/t$V", "t$V\@example.com" ]) or BAIL_OUT "init v$V"; } + require PublicInbox::Config; + require PublicInbox::InboxWritable; my $cfg = PublicInbox::Config->new; my $seen = 0; $cfg->each_inbox(sub { diff --git a/t/lei-mirror.t b/t/lei-mirror.t index e3707979..cbe300da 100644 --- a/t/lei-mirror.t +++ b/t/lei-mirror.t @@ -27,7 +27,7 @@ test_lei({ tmpdir => $tmpdir }, sub { like($lei_out, qr!\Q$t2\E!, 't2 added to ls-externals'); ok(!$lei->('add-external', $t2, '--mirror', "$http/t2/"), - '--mirror fails if reused'); + '--mirror fails if reused') or diag "$lei_err.$lei_out = $?"; ok($lei->('ls-external'), 'ls-external'); like($lei_out, qr!\Q$t2\E!, 'still in ls-externals');