These affect the convert, import, mark sub-commands. Eric Wong (3): lei_input: avoid special case sub for --stdin lei: use IO::Uncompress::Gunzip MultiStream lei_input: treat ".eml" and ".patch" suffix as "eml" lib/PublicInbox/LeiConvert.pm | 1 - lib/PublicInbox/LeiImport.pm | 1 - lib/PublicInbox/LeiInput.pm | 33 +++++++++++++++++++-------------- lib/PublicInbox/LeiMark.pm | 1 - lib/PublicInbox/LeiRemote.pm | 2 +- lib/PublicInbox/LeiXSearch.pm | 2 +- t/lei-import.t | 2 +- t/lei-mark.t | 4 ++-- 8 files changed, 24 insertions(+), 22 deletions(-)
We can consistently open /dev/stdin correctly nowadays, so drop the input_stdin and just use the normal ->path_to_fd code path. --- lib/PublicInbox/LeiConvert.pm | 1 - lib/PublicInbox/LeiImport.pm | 1 - lib/PublicInbox/LeiInput.pm | 8 +------- lib/PublicInbox/LeiMark.pm | 1 - 4 files changed, 1 insertion(+), 10 deletions(-) diff --git a/lib/PublicInbox/LeiConvert.pm b/lib/PublicInbox/LeiConvert.pm index 5d0adb14..da3b50cc 100644 --- a/lib/PublicInbox/LeiConvert.pm +++ b/lib/PublicInbox/LeiConvert.pm @@ -34,7 +34,6 @@ sub input_maildir_cb { sub do_convert { # via wq_do my ($self) = @_; - $self->input_stdin; for my $input (@{$self->{inputs}}) { $self->input_path_url($input); } diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm index 803b5cda..227a2a21 100644 --- a/lib/PublicInbox/LeiImport.pm +++ b/lib/PublicInbox/LeiImport.pm @@ -78,7 +78,6 @@ sub lei_import { # the main "lei import" method $self->{-wq_nr_workers} = $j // 1; # locked my ($op_c, undef) = $lei->workers_start($self, 'lei_import', $j, $ops); $lei->{imp} = $self; - $self->wq_io_do('input_stdin', []) if $self->{0}; net_merge_complete($self) unless $lei->{auth}; $op_c->op_wait_event($ops); } diff --git a/lib/PublicInbox/LeiInput.pm b/lib/PublicInbox/LeiInput.pm index d916249a..93284e8b 100644 --- a/lib/PublicInbox/LeiInput.pm +++ b/lib/PublicInbox/LeiInput.pm @@ -46,12 +46,6 @@ error reading $name: $! } } -sub input_stdin { - my ($self) = @_; - my $in = delete $self->{0} or return; - $self->input_fh($self->{lei}->{opt}->{'in-format'}, $in, '<stdin>'); -} - sub input_path_url { my ($self, $input, @args) = @_; my $lei = $self->{lei}; @@ -94,7 +88,7 @@ sub prepare_inputs { # returns undef on error @$inputs and return $lei->fail("--stdin and @$inputs do not mix"); check_input_format($lei) or return; - $self->{0} = $lei->{0}; + push @$inputs, '/dev/stdin'; } my $net = $lei->{net}; # NetWriter may be created by l2m my $fmt = $lei->{opt}->{'in-format'}; diff --git a/lib/PublicInbox/LeiMark.pm b/lib/PublicInbox/LeiMark.pm index 6e611318..b187d6e7 100644 --- a/lib/PublicInbox/LeiMark.pm +++ b/lib/PublicInbox/LeiMark.pm @@ -118,7 +118,6 @@ sub lei_mark { # the "lei mark" method $self->{vmd_mod} = $vmd_mod; my ($op_c, undef) = $lei->workers_start($self, 'lei_mark', 1, $ops); $lei->{mark} = $self; - $self->wq_io_do('input_stdin', []) if $self->{0}; net_merge_complete($self) unless $lei->{auth}; $op_c->op_wait_event($ops); }
This is compatible with default gunzip(1) behavior and future-proofs us against potential changes in PublicInbox::WWW to save memory on public-inbox-httpd instances. --- lib/PublicInbox/LeiRemote.pm | 2 +- lib/PublicInbox/LeiXSearch.pm | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/PublicInbox/LeiRemote.pm b/lib/PublicInbox/LeiRemote.pm index 399fc936..945d9990 100644 --- a/lib/PublicInbox/LeiRemote.pm +++ b/lib/PublicInbox/LeiRemote.pm @@ -50,7 +50,7 @@ sub mset { my ($fh, $pid) = popen_rd($cmd, undef, $rdr); my $reap = PublicInbox::OnDestroy->new($lei->can('sigint_reap'), $pid); $self->{smsg} = []; - $fh = IO::Uncompress::Gunzip->new($fh); + $fh = IO::Uncompress::Gunzip->new($fh, MultiStream => 1); PublicInbox::MboxReader->mboxrd($fh, \&_each_mboxrd_eml, $self); my $err = waitpid($pid, 0) == $pid ? undef : "BUG: waitpid($cmd): $!"; diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm index 1a194f1c..f3b8cc25 100644 --- a/lib/PublicInbox/LeiXSearch.pm +++ b/lib/PublicInbox/LeiXSearch.pm @@ -272,7 +272,7 @@ sub query_remote_mboxrd { $lei->qerr("# $cmd"); my ($fh, $pid) = popen_rd($cmd, undef, $rdr); $reap_curl = PublicInbox::OnDestroy->new($sigint_reap, $pid); - $fh = IO::Uncompress::Gunzip->new($fh); + $fh = IO::Uncompress::Gunzip->new($fh, MultiStream => 1); PublicInbox::MboxReader->mboxrd($fh, \&each_remote_eml, $self, $lei, $each_smsg); my $err = waitpid($pid, 0) == $pid ? undef
".eml" is a suffix supported by (/usr/local)/etc/mime.types on Debian and FreeBSD systems using the "mime-support" package. ".patch" is what "git format-patch" generates by default since git v1.5.0 in 2007. --- lib/PublicInbox/LeiInput.pm | 25 ++++++++++++++++++------- t/lei-import.t | 2 +- t/lei-mark.t | 4 ++-- 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/lib/PublicInbox/LeiInput.pm b/lib/PublicInbox/LeiInput.pm index 93284e8b..c04fc2f8 100644 --- a/lib/PublicInbox/LeiInput.pm +++ b/lib/PublicInbox/LeiInput.pm @@ -14,9 +14,9 @@ sub check_input_format ($;$) { my $err = $files ? "regular file(s):\n@$files" : '--stdin'; return $lei->fail("--$opt_key unset for $err"); } + return 1 if $fmt eq 'eml'; require PublicInbox::MboxLock if $files; require PublicInbox::MboxReader; - return 1 if $fmt eq 'eml'; # XXX: should this handle {gz,bz2,xz}? that's currently in LeiToMail PublicInbox::MboxReader->reads($fmt) or return $lei->fail("--$opt_key=$fmt unrecognized"); @@ -28,7 +28,6 @@ sub check_input_format ($;$) { sub input_fh { my ($self, $ifmt, $fh, $name, @args) = @_; if ($ifmt eq 'eml') { - require PublicInbox::Eml; my $buf = do { local $/; <$fh> } // return $self->{lei}->child_error(1 << 8, <<""); error reading $name: $! @@ -60,13 +59,21 @@ sub input_path_url { $self, @args); return; } - $input =~ s!\A([a-z0-9]+):!!i and $ifmt = lc($1); + if ($input =~ s!\A([a-z0-9]+):!!i) { + $ifmt = lc($1); + } elsif ($input =~ /\.(?:patch|eml)\z/i) { + $ifmt = 'eml'; + } my $devfd = $lei->path_to_fd($input) // return; if ($devfd >= 0) { $self->input_fh($ifmt, $lei->{$devfd}, $input, @args); - } elsif (-f $input) { - my $m = $lei->{opt}->{'lock'} // ($ifmt eq 'eml' ? ['none'] : - PublicInbox::MboxLock->defaults); + } elsif (-f $input && $ifmt eq 'eml') { + open my $fh, '<', $input or + return $lei->fail("open($input): $!"); + $self->input_fh($ifmt, $fh, $input, @args); + } elsif (-f _) { + my $m = $lei->{opt}->{'lock'} // + PublicInbox::MboxLock->defaults; my $mbl = PublicInbox::MboxLock->acq($input, 0, $m); $self->input_fh($ifmt, $mbl->{fh}, $input, @args); } elsif (-d _ && (-d "$input/cur" || -d "$input/new")) { @@ -91,7 +98,6 @@ sub prepare_inputs { # returns undef on error push @$inputs, '/dev/stdin'; } my $net = $lei->{net}; # NetWriter may be created by l2m - my $fmt = $lei->{opt}->{'in-format'}; my (@f, @d); # e.g. Maildir:/home/user/Mail/ or imaps://example.com/INBOX for my $input (@$inputs) { @@ -120,6 +126,11 @@ sub prepare_inputs { # returns undef on error } else { return $lei->fail("Unable to handle $input"); } + } elsif ($input =~ /\.(eml|patch)\z/i && -f $input) { + lc($in_fmt//'eml') eq 'eml' or return $lei->fail(<<""); +$input is `eml', not --in-format=$in_fmt + + require PublicInbox::Eml; } else { my $devfd = $lei->path_to_fd($input) // return; if ($devfd >= 0 || -f $input || -p _) { diff --git a/t/lei-import.t b/t/lei-import.t index 33ce490d..99289748 100644 --- a/t/lei-import.t +++ b/t/lei-import.t @@ -4,7 +4,7 @@ use strict; use v5.10.1; use PublicInbox::TestCommon; test_lei(sub { ok(!lei(qw(import -F bogus), 't/plack-qp.eml'), 'fails with bogus format'); -like($lei_err, qr/\bbogus unrecognized/, 'gave error message'); +like($lei_err, qr/\bis `eml', not --in-format/, 'gave error message'); lei_ok(qw(q s:boolean), \'search miss before import'); unlike($lei_out, qr/boolean/i, 'no results, yet'); diff --git a/t/lei-mark.t b/t/lei-mark.t index 7855839e..98652c85 100644 --- a/t/lei-mark.t +++ b/t/lei-mark.t @@ -26,8 +26,8 @@ my $check_kw = sub { test_lei(sub { lei_ok(qw(ls-label)); is($lei_out, '', 'no labels, yet'); - lei_ok(qw(import -F eml t/utf8.eml)); - lei_ok(qw(mark -F eml t/utf8.eml +kw:flagged +L:urgent)); + lei_ok(qw(import t/utf8.eml)); + lei_ok(qw(mark t/utf8.eml +kw:flagged +L:urgent)); $check_kw->(['flagged'], L => ['urgent']); lei_ok(qw(ls-label)); is($lei_out, "urgent\n", 'label found'); ok(!lei(qw(mark -F eml t/utf8.eml +kw:seeen)), 'bad kw rejected');