* [PATCH 1/3] lei_input: avoid special case sub for --stdin
2021-03-29 7:08 [PATCH 0/3] lei input improvements Eric Wong
@ 2021-03-29 7:08 ` Eric Wong
2021-03-29 7:08 ` [PATCH 2/3] lei: use IO::Uncompress::Gunzip MultiStream Eric Wong
2021-03-29 7:08 ` [PATCH 3/3] lei_input: treat ".eml" and ".patch" suffix as "eml" Eric Wong
2 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2021-03-29 7:08 UTC (permalink / raw)
To: meta
We can consistently open /dev/stdin correctly nowadays, so
drop the input_stdin and just use the normal ->path_to_fd
code path.
---
lib/PublicInbox/LeiConvert.pm | 1 -
lib/PublicInbox/LeiImport.pm | 1 -
lib/PublicInbox/LeiInput.pm | 8 +-------
lib/PublicInbox/LeiMark.pm | 1 -
4 files changed, 1 insertion(+), 10 deletions(-)
diff --git a/lib/PublicInbox/LeiConvert.pm b/lib/PublicInbox/LeiConvert.pm
index 5d0adb14..da3b50cc 100644
--- a/lib/PublicInbox/LeiConvert.pm
+++ b/lib/PublicInbox/LeiConvert.pm
@@ -34,7 +34,6 @@ sub input_maildir_cb {
sub do_convert { # via wq_do
my ($self) = @_;
- $self->input_stdin;
for my $input (@{$self->{inputs}}) {
$self->input_path_url($input);
}
diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm
index 803b5cda..227a2a21 100644
--- a/lib/PublicInbox/LeiImport.pm
+++ b/lib/PublicInbox/LeiImport.pm
@@ -78,7 +78,6 @@ sub lei_import { # the main "lei import" method
$self->{-wq_nr_workers} = $j // 1; # locked
my ($op_c, undef) = $lei->workers_start($self, 'lei_import', $j, $ops);
$lei->{imp} = $self;
- $self->wq_io_do('input_stdin', []) if $self->{0};
net_merge_complete($self) unless $lei->{auth};
$op_c->op_wait_event($ops);
}
diff --git a/lib/PublicInbox/LeiInput.pm b/lib/PublicInbox/LeiInput.pm
index d916249a..93284e8b 100644
--- a/lib/PublicInbox/LeiInput.pm
+++ b/lib/PublicInbox/LeiInput.pm
@@ -46,12 +46,6 @@ error reading $name: $!
}
}
-sub input_stdin {
- my ($self) = @_;
- my $in = delete $self->{0} or return;
- $self->input_fh($self->{lei}->{opt}->{'in-format'}, $in, '<stdin>');
-}
-
sub input_path_url {
my ($self, $input, @args) = @_;
my $lei = $self->{lei};
@@ -94,7 +88,7 @@ sub prepare_inputs { # returns undef on error
@$inputs and return
$lei->fail("--stdin and @$inputs do not mix");
check_input_format($lei) or return;
- $self->{0} = $lei->{0};
+ push @$inputs, '/dev/stdin';
}
my $net = $lei->{net}; # NetWriter may be created by l2m
my $fmt = $lei->{opt}->{'in-format'};
diff --git a/lib/PublicInbox/LeiMark.pm b/lib/PublicInbox/LeiMark.pm
index 6e611318..b187d6e7 100644
--- a/lib/PublicInbox/LeiMark.pm
+++ b/lib/PublicInbox/LeiMark.pm
@@ -118,7 +118,6 @@ sub lei_mark { # the "lei mark" method
$self->{vmd_mod} = $vmd_mod;
my ($op_c, undef) = $lei->workers_start($self, 'lei_mark', 1, $ops);
$lei->{mark} = $self;
- $self->wq_io_do('input_stdin', []) if $self->{0};
net_merge_complete($self) unless $lei->{auth};
$op_c->op_wait_event($ops);
}
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 2/3] lei: use IO::Uncompress::Gunzip MultiStream
2021-03-29 7:08 [PATCH 0/3] lei input improvements Eric Wong
2021-03-29 7:08 ` [PATCH 1/3] lei_input: avoid special case sub for --stdin Eric Wong
@ 2021-03-29 7:08 ` Eric Wong
2021-03-29 7:08 ` [PATCH 3/3] lei_input: treat ".eml" and ".patch" suffix as "eml" Eric Wong
2 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2021-03-29 7:08 UTC (permalink / raw)
To: meta
This is compatible with default gunzip(1) behavior and
future-proofs us against potential changes in PublicInbox::WWW
to save memory on public-inbox-httpd instances.
---
lib/PublicInbox/LeiRemote.pm | 2 +-
lib/PublicInbox/LeiXSearch.pm | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/lib/PublicInbox/LeiRemote.pm b/lib/PublicInbox/LeiRemote.pm
index 399fc936..945d9990 100644
--- a/lib/PublicInbox/LeiRemote.pm
+++ b/lib/PublicInbox/LeiRemote.pm
@@ -50,7 +50,7 @@ sub mset {
my ($fh, $pid) = popen_rd($cmd, undef, $rdr);
my $reap = PublicInbox::OnDestroy->new($lei->can('sigint_reap'), $pid);
$self->{smsg} = [];
- $fh = IO::Uncompress::Gunzip->new($fh);
+ $fh = IO::Uncompress::Gunzip->new($fh, MultiStream => 1);
PublicInbox::MboxReader->mboxrd($fh, \&_each_mboxrd_eml, $self);
my $err = waitpid($pid, 0) == $pid ? undef
: "BUG: waitpid($cmd): $!";
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index 1a194f1c..f3b8cc25 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -272,7 +272,7 @@ sub query_remote_mboxrd {
$lei->qerr("# $cmd");
my ($fh, $pid) = popen_rd($cmd, undef, $rdr);
$reap_curl = PublicInbox::OnDestroy->new($sigint_reap, $pid);
- $fh = IO::Uncompress::Gunzip->new($fh);
+ $fh = IO::Uncompress::Gunzip->new($fh, MultiStream => 1);
PublicInbox::MboxReader->mboxrd($fh, \&each_remote_eml, $self,
$lei, $each_smsg);
my $err = waitpid($pid, 0) == $pid ? undef
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 3/3] lei_input: treat ".eml" and ".patch" suffix as "eml"
2021-03-29 7:08 [PATCH 0/3] lei input improvements Eric Wong
2021-03-29 7:08 ` [PATCH 1/3] lei_input: avoid special case sub for --stdin Eric Wong
2021-03-29 7:08 ` [PATCH 2/3] lei: use IO::Uncompress::Gunzip MultiStream Eric Wong
@ 2021-03-29 7:08 ` Eric Wong
2 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2021-03-29 7:08 UTC (permalink / raw)
To: meta
".eml" is a suffix supported by (/usr/local)/etc/mime.types
on Debian and FreeBSD systems using the "mime-support" package.
".patch" is what "git format-patch" generates by default since
git v1.5.0 in 2007.
---
lib/PublicInbox/LeiInput.pm | 25 ++++++++++++++++++-------
t/lei-import.t | 2 +-
t/lei-mark.t | 4 ++--
3 files changed, 21 insertions(+), 10 deletions(-)
diff --git a/lib/PublicInbox/LeiInput.pm b/lib/PublicInbox/LeiInput.pm
index 93284e8b..c04fc2f8 100644
--- a/lib/PublicInbox/LeiInput.pm
+++ b/lib/PublicInbox/LeiInput.pm
@@ -14,9 +14,9 @@ sub check_input_format ($;$) {
my $err = $files ? "regular file(s):\n@$files" : '--stdin';
return $lei->fail("--$opt_key unset for $err");
}
+ return 1 if $fmt eq 'eml';
require PublicInbox::MboxLock if $files;
require PublicInbox::MboxReader;
- return 1 if $fmt eq 'eml';
# XXX: should this handle {gz,bz2,xz}? that's currently in LeiToMail
PublicInbox::MboxReader->reads($fmt) or
return $lei->fail("--$opt_key=$fmt unrecognized");
@@ -28,7 +28,6 @@ sub check_input_format ($;$) {
sub input_fh {
my ($self, $ifmt, $fh, $name, @args) = @_;
if ($ifmt eq 'eml') {
- require PublicInbox::Eml;
my $buf = do { local $/; <$fh> } //
return $self->{lei}->child_error(1 << 8, <<"");
error reading $name: $!
@@ -60,13 +59,21 @@ sub input_path_url {
$self, @args);
return;
}
- $input =~ s!\A([a-z0-9]+):!!i and $ifmt = lc($1);
+ if ($input =~ s!\A([a-z0-9]+):!!i) {
+ $ifmt = lc($1);
+ } elsif ($input =~ /\.(?:patch|eml)\z/i) {
+ $ifmt = 'eml';
+ }
my $devfd = $lei->path_to_fd($input) // return;
if ($devfd >= 0) {
$self->input_fh($ifmt, $lei->{$devfd}, $input, @args);
- } elsif (-f $input) {
- my $m = $lei->{opt}->{'lock'} // ($ifmt eq 'eml' ? ['none'] :
- PublicInbox::MboxLock->defaults);
+ } elsif (-f $input && $ifmt eq 'eml') {
+ open my $fh, '<', $input or
+ return $lei->fail("open($input): $!");
+ $self->input_fh($ifmt, $fh, $input, @args);
+ } elsif (-f _) {
+ my $m = $lei->{opt}->{'lock'} //
+ PublicInbox::MboxLock->defaults;
my $mbl = PublicInbox::MboxLock->acq($input, 0, $m);
$self->input_fh($ifmt, $mbl->{fh}, $input, @args);
} elsif (-d _ && (-d "$input/cur" || -d "$input/new")) {
@@ -91,7 +98,6 @@ sub prepare_inputs { # returns undef on error
push @$inputs, '/dev/stdin';
}
my $net = $lei->{net}; # NetWriter may be created by l2m
- my $fmt = $lei->{opt}->{'in-format'};
my (@f, @d);
# e.g. Maildir:/home/user/Mail/ or imaps://example.com/INBOX
for my $input (@$inputs) {
@@ -120,6 +126,11 @@ sub prepare_inputs { # returns undef on error
} else {
return $lei->fail("Unable to handle $input");
}
+ } elsif ($input =~ /\.(eml|patch)\z/i && -f $input) {
+ lc($in_fmt//'eml') eq 'eml' or return $lei->fail(<<"");
+$input is `eml', not --in-format=$in_fmt
+
+ require PublicInbox::Eml;
} else {
my $devfd = $lei->path_to_fd($input) // return;
if ($devfd >= 0 || -f $input || -p _) {
diff --git a/t/lei-import.t b/t/lei-import.t
index 33ce490d..99289748 100644
--- a/t/lei-import.t
+++ b/t/lei-import.t
@@ -4,7 +4,7 @@
use strict; use v5.10.1; use PublicInbox::TestCommon;
test_lei(sub {
ok(!lei(qw(import -F bogus), 't/plack-qp.eml'), 'fails with bogus format');
-like($lei_err, qr/\bbogus unrecognized/, 'gave error message');
+like($lei_err, qr/\bis `eml', not --in-format/, 'gave error message');
lei_ok(qw(q s:boolean), \'search miss before import');
unlike($lei_out, qr/boolean/i, 'no results, yet');
diff --git a/t/lei-mark.t b/t/lei-mark.t
index 7855839e..98652c85 100644
--- a/t/lei-mark.t
+++ b/t/lei-mark.t
@@ -26,8 +26,8 @@ my $check_kw = sub {
test_lei(sub {
lei_ok(qw(ls-label)); is($lei_out, '', 'no labels, yet');
- lei_ok(qw(import -F eml t/utf8.eml));
- lei_ok(qw(mark -F eml t/utf8.eml +kw:flagged +L:urgent));
+ lei_ok(qw(import t/utf8.eml));
+ lei_ok(qw(mark t/utf8.eml +kw:flagged +L:urgent));
$check_kw->(['flagged'], L => ['urgent']);
lei_ok(qw(ls-label)); is($lei_out, "urgent\n", 'label found');
ok(!lei(qw(mark -F eml t/utf8.eml +kw:seeen)), 'bad kw rejected');
^ permalink raw reply related [flat|nested] 4+ messages in thread