From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 05/11] lei: split out MdirReader package, lazy-require earlier
Date: Tue, 9 Feb 2021 07:09:31 -0100 [thread overview]
Message-ID: <20210209080937.4678-6-e@80x24.org> (raw)
In-Reply-To: <20210209080937.4678-1-e@80x24.org>
We'll do more requires in the top-level lei-daemon process to
save work in workers. We can also work towards aborting on
user errors in lei-daemon rather than worker processes.
"lei import -f mbox*" is finally tested inside t/lei_to_mail.t
---
MANIFEST | 1 +
lib/PublicInbox/LeiImport.pm | 25 +++++++++++++++----------
lib/PublicInbox/LeiToMail.pm | 26 ++++++++++----------------
lib/PublicInbox/MdirReader.pm | 21 +++++++++++++++++++++
lib/PublicInbox/TestCommon.pm | 4 +++-
t/lei-import.t | 5 ++++-
t/lei_to_mail.t | 19 ++++++++++++++++---
7 files changed, 70 insertions(+), 31 deletions(-)
create mode 100644 lib/PublicInbox/MdirReader.pm
diff --git a/MANIFEST b/MANIFEST
index 7f417743..6b3fc812 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -199,6 +199,7 @@ lib/PublicInbox/ManifestJsGz.pm
lib/PublicInbox/Mbox.pm
lib/PublicInbox/MboxGz.pm
lib/PublicInbox/MboxReader.pm
+lib/PublicInbox/MdirReader.pm
lib/PublicInbox/MiscIdx.pm
lib/PublicInbox/MiscSearch.pm
lib/PublicInbox/MsgIter.pm
diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm
index a63bfdfd..8358d9d4 100644
--- a/lib/PublicInbox/LeiImport.pm
+++ b/lib/PublicInbox/LeiImport.pm
@@ -6,7 +6,6 @@ package PublicInbox::LeiImport;
use strict;
use v5.10.1;
use parent qw(PublicInbox::IPC);
-use PublicInbox::MboxReader;
use PublicInbox::Eml;
use PublicInbox::InboxWritable qw(eml_from_path);
use PublicInbox::PktOp;
@@ -37,8 +36,17 @@ sub call { # the main "lei import" method
$lei->{opt}->{kw} //= 1;
my $fmt = $lei->{opt}->{'format'};
my $self = $lei->{imp} = bless {}, $cls;
- if (my @f = grep { -f } @argv && !$fmt) {
- return $lei->fail("--format unset for regular files:\n@f");
+ my @f;
+ for my $x (@argv) {
+ if (-f $x) { push @f, $x }
+ elsif (-d _) { require PublicInbox::MdirReader }
+ }
+ (@f && !$fmt) and
+ return $lei->fail("--format unset for regular file(s):\n@f");
+ if (@f && $fmt ne 'eml') {
+ require PublicInbox::MboxReader;
+ PublicInbox::MboxReader->can($fmt) or
+ return $lei->fail( "--format=$fmt unrecognized\n");
}
$self->{0} = $lei->{0} if $lei->{opt}->{stdin};
my $ops = {
@@ -83,11 +91,9 @@ error reading $x: $!
my $eml = PublicInbox::Eml->new(\$buf);
_import_eml($eml, $lei->{sto}, $set_kw);
- } else { # some mbox
- my $cb = PublicInbox::MboxReader->can($fmt);
- $cb or return $lei->child_error(1 >> 8, <<"");
---format $fmt unsupported for $x
-
+ } else { # some mbox (->can already checked in call);
+ my $cb = PublicInbox::MboxReader->can($fmt) //
+ die "BUG: bad fmt=$fmt";
$cb->(undef, $fh, \&_import_eml, $lei->{sto}, $set_kw);
}
};
@@ -109,8 +115,7 @@ unable to open $x: $!
_import_fh($lei, $fh, $x);
} elsif (-d _ && (-d "$x/cur" || -d "$x/new")) {
- require PublicInbox::LeiToMail;
- PublicInbox::LeiToMail::maildir_each_file($x,
+ PublicInbox::MdirReader::maildir_each_file($x,
\&_import_maildir,
$lei->{sto}, $lei->{opt}->{kw});
} else {
diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index a5a196db..e3e512be 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -18,6 +18,7 @@ use Symbol qw(gensym);
use IO::Handle; # ->autoflush
use Fcntl qw(SEEK_SET SEEK_END O_CREAT O_EXCL O_WRONLY);
use Errno qw(EEXIST ESPIPE ENOENT EPIPE);
+my ($maildir_each_file);
# struggles with short-lived repos, Gcf2Client makes little sense with lei;
# but we may use in-process libgit2 in the future.
@@ -266,18 +267,6 @@ sub _mbox_write_cb ($$) {
}
}
-sub maildir_each_file ($$;@) {
- my ($dir, $cb, @arg) = @_;
- $dir .= '/' unless substr($dir, -1) eq '/';
- for my $d (qw(new/ cur/)) {
- my $pfx = $dir.$d;
- opendir my $dh, $pfx or next;
- while (defined(my $fn = readdir($dh))) {
- $cb->($pfx.$fn, @arg) if $fn =~ /:2,[A-Za-z]*\z/;
- }
- }
-}
-
sub _augment_file { # maildir_each_file cb
my ($f, $lei) = @_;
my $eml = PublicInbox::InboxWritable::eml_from_path($f) or return;
@@ -354,11 +343,18 @@ sub new {
my $dst = $lei->{ovv}->{dst};
my $self = bless {}, $cls;
if ($fmt eq 'maildir') {
+ $maildir_each_file //= do {
+ require PublicInbox::MdirReader;
+ PublicInbox::MdirReader->can('maildir_each_file');
+ };
+ $lei->{opt}->{augment} and
+ require PublicInbox::InboxWritable; # eml_from_path
$self->{base_type} = 'maildir';
-e $dst && !-d _ and die
"$dst exists and is not a directory\n";
$lei->{ovv}->{dst} = $dst .= '/' if substr($dst, -1) ne '/';
} elsif (substr($fmt, 0, 4) eq 'mbox') {
+ require PublicInbox::MboxReader if $lei->{opt}->{augment};
(-d $dst || (-e _ && !-w _)) and die
"$dst exists and is not a writable file\n";
$self->can("eml2$fmt") or die "bad mbox --format=$fmt\n";
@@ -389,12 +385,11 @@ sub _do_augment_maildir {
if ($lei->{opt}->{augment}) {
my $dedupe = $lei->{dedupe};
if ($dedupe && $dedupe->prepare_dedupe) {
- require PublicInbox::InboxWritable; # eml_from_path
- maildir_each_file($dst, \&_augment_file, $lei);
+ $maildir_each_file->($dst, \&_augment_file, $lei);
$dedupe->pause_dedupe;
}
} else { # clobber existing Maildir
- maildir_each_file($dst, \&_unlink);
+ $maildir_each_file->($dst, \&_unlink);
}
}
@@ -435,7 +430,6 @@ sub _do_augment_mbox {
my $rd = $zsfx ? decompress_src($out, $zsfx, $lei) :
dup_src($out);
my $fmt = $lei->{ovv}->{fmt};
- require PublicInbox::MboxReader;
PublicInbox::MboxReader->$fmt($rd, \&_augment, $lei);
}
# maybe some systems don't honor O_APPEND, Perl does this:
diff --git a/lib/PublicInbox/MdirReader.pm b/lib/PublicInbox/MdirReader.pm
new file mode 100644
index 00000000..c6a0e7a8
--- /dev/null
+++ b/lib/PublicInbox/MdirReader.pm
@@ -0,0 +1,21 @@
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Maildirs for now, MH eventually
+package PublicInbox::MdirReader;
+use strict;
+use v5.10.1;
+
+sub maildir_each_file ($$;@) {
+ my ($dir, $cb, @arg) = @_;
+ $dir .= '/' unless substr($dir, -1) eq '/';
+ for my $d (qw(new/ cur/)) {
+ my $pfx = $dir.$d;
+ opendir my $dh, $pfx or next;
+ while (defined(my $fn = readdir($dh))) {
+ $cb->($pfx.$fn, @arg) if $fn =~ /:2,[A-Za-z]*\z/;
+ }
+ }
+}
+
+1;
diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm
index ec9191b6..53f13437 100644
--- a/lib/PublicInbox/TestCommon.pm
+++ b/lib/PublicInbox/TestCommon.pm
@@ -14,7 +14,7 @@ BEGIN {
@EXPORT = qw(tmpdir tcp_server tcp_connect require_git require_mods
run_script start_script key2sub xsys xsys_e xqx eml_load tick
have_xapian_compact json_utf8 setup_public_inboxes
- tcp_host_port test_lei $lei $lei_out $lei_err $lei_opt);
+ tcp_host_port test_lei lei $lei $lei_out $lei_err $lei_opt);
require Test::More;
my @methods = grep(!/\W/, @Test::More::EXPORT);
eval(join('', map { "*$_=\\&Test::More::$_;" } @methods));
@@ -457,6 +457,8 @@ our $lei = sub {
$res;
};
+sub lei (@) { $lei->(@_) }
+
sub json_utf8 () {
state $x = ref(PublicInbox::Config->json)->new->utf8->canonical;
}
diff --git a/t/lei-import.t b/t/lei-import.t
index 709d89fa..b691798a 100644
--- a/t/lei-import.t
+++ b/t/lei-import.t
@@ -3,12 +3,14 @@
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use strict; use v5.10.1; use PublicInbox::TestCommon;
test_lei(sub {
+ok(!$lei->(qw(import -f bogus), 't/plack-qp.eml'), 'fails with bogus format');
+like($lei_err, qr/\bbogus unrecognized/, 'gave error message');
ok($lei->(qw(q s:boolean)), 'search miss before import');
unlike($lei_out, qr/boolean/i, 'no results, yet');
open my $fh, '<', 't/data/0001.patch' or BAIL_OUT $!;
ok($lei->([qw(import -f eml -)], undef, { %$lei_opt, 0 => $fh }),
- 'import single file from stdin');
+ 'import single file from stdin') or diag $lei_err;
close $fh;
ok($lei->(qw(q s:boolean)), 'search hit after import');
ok($lei->(qw(import -f eml), 't/data/message_embed.eml'),
@@ -35,5 +37,6 @@ $res = json_utf8->decode($lei_out);
is($res->[1], undef, 'only one result');
is_deeply($res->[0]->{kw}, [], 'no keywords set');
+# see t/lei_to_mail.t for "import -f mbox*"
});
done_testing;
diff --git a/t/lei_to_mail.t b/t/lei_to_mail.t
index a25795ca..77e9902e 100644
--- a/t/lei_to_mail.t
+++ b/t/lei_to_mail.t
@@ -10,6 +10,7 @@ use Fcntl qw(SEEK_SET);
use PublicInbox::Spawn qw(popen_rd which);
use List::Util qw(shuffle);
require_mods(qw(DBD::SQLite));
+require PublicInbox::MdirReader;
require PublicInbox::MboxReader;
require PublicInbox::LeiOverview;
require PublicInbox::LEI;
@@ -127,6 +128,17 @@ my $orig = do {
is(do { local $/; <$fh> }, $raw, 'jobs > 1');
$raw;
};
+
+test_lei(sub {
+ ok(lei(qw(import -f), $mbox, $fn), 'imported mbox');
+ ok(lei(qw(q s:x)), 'lei q works') or diag $lei_err;
+ my $res = json_utf8->decode($lei_out);
+ my $x = $res->[0];
+ is($x->{'s'}, 'x', 'subject imported') or diag $lei_out;
+ is_deeply($x->{'kw'}, ['seen'], 'kw imported') or diag $lei_out;
+ is($res->[1], undef, 'only one result');
+});
+
for my $zsfx (qw(gz bz2 xz)) { # XXX should we support zst, zz, lzo, lzma?
my $zsfx2cmd = PublicInbox::LeiToMail->can('zsfx2cmd');
SKIP: {
@@ -230,6 +242,7 @@ SKIP: { # FIFO support
}
{ # Maildir support
+ my $each_file = PublicInbox::MdirReader->can('maildir_each_file');
my $md = "$tmpdir/maildir/";
my $wcb = $wcb_get->('maildir', $md);
is(ref($wcb), 'CODE', 'got Maildir callback');
@@ -237,7 +250,7 @@ SKIP: { # FIFO support
$wcb->(\(my $x = $buf), $b4dc0ffee);
my @f;
- PublicInbox::LeiToMail::maildir_each_file($md, sub { push @f, shift });
+ $each_file->($md, sub { push @f, shift });
open my $fh, $f[0] or BAIL_OUT $!;
is(do { local $/; <$fh> }, $buf, 'wrote to Maildir');
@@ -246,7 +259,7 @@ SKIP: { # FIFO support
$wcb->(\($x = $buf."\nx\n"), $deadcafe);
my @x = ();
- PublicInbox::LeiToMail::maildir_each_file($md, sub { push @x, shift });
+ $each_file->($md, sub { push @x, shift });
is(scalar(@x), 1, 'wrote one new file');
ok(!-f $f[0], 'old file clobbered');
open $fh, $x[0] or BAIL_OUT $!;
@@ -257,7 +270,7 @@ SKIP: { # FIFO support
$wcb->(\($x = $buf."\ny\n"), $deadcafe);
$wcb->(\($x = $buf."\ny\n"), $b4dc0ffee); # skipped by dedupe
@f = ();
- PublicInbox::LeiToMail::maildir_each_file($md, sub { push @f, shift });
+ $each_file->($md, sub { push @f, shift });
is(scalar grep(/\A\Q$x[0]\E\z/, @f), 1, 'old file still there');
my @new = grep(!/\A\Q$x[0]\E\z/, @f);
is(scalar @new, 1, '1 new file written (b4dc0ffee skipped)');
next prev parent reply other threads:[~2021-02-09 8:09 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-02-09 8:09 [PATCH 00/11] Maildir code consolidation, test updates Eric Wong
2021-02-09 8:09 ` [PATCH 01/11] t/thread-index-gap.t: avoid unnecessary map Eric Wong
2021-02-09 8:09 ` [PATCH 02/11] test_common: disable fsync on the CLI where possible Eric Wong
2021-02-09 8:09 ` [PATCH 03/11] t/cgi.t: modernizations and style updates Eric Wong
2021-02-09 8:09 ` [PATCH 04/11] git: ->qx: respect caller's $/ in array context Eric Wong
2021-02-09 8:09 ` Eric Wong [this message]
2021-02-09 8:09 ` [PATCH 06/11] t/run.perl: fix for >128 tests Eric Wong
2021-02-09 8:09 ` [PATCH 07/11] use MdirReader in -watch and InboxWritable Eric Wong
2021-02-09 8:09 ` [PATCH 08/11] lei q: prefix --alert ops with ':' instead of '-' Eric Wong
2021-02-09 8:09 ` [PATCH 09/11] t/run.perl: drop Cwd dependency Eric Wong
2021-02-09 8:09 ` [PATCH 10/11] lei: replace "I:"-prefixed info messages with "#" Eric Wong
2021-02-09 8:09 ` [PATCH 11/11] tests|lei: fixes for TEST_RUN_MODE=0 and lei oneshot Eric Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210209080937.4678-6-e@80x24.org \
--to=e@80x24.org \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).