* [PATCH 1/9] drop dependency on File::Path::Expand
2016-06-15 0:37 [PATCH 0/9] big mda filter changes Eric Wong
@ 2016-06-15 0:37 ` Eric Wong
2016-06-15 0:37 ` [PATCH 2/9] t/feed.t: make IPC::Run usage optional Eric Wong
` (7 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Eric Wong @ 2016-06-15 0:37 UTC (permalink / raw)
To: meta
We still pull it in via Email::LocalDelivery, but that
dependency will go away, soon.
---
INSTALL | 1 -
Makefile.PL | 1 -
lib/PublicInbox/Config.pm | 3 +--
script/public-inbox-mda | 4 +---
4 files changed, 2 insertions(+), 7 deletions(-)
diff --git a/INSTALL b/INSTALL
index ca74c1c..25cc3c9 100644
--- a/INSTALL
+++ b/INSTALL
@@ -32,7 +32,6 @@ Requirements (server MDA)
- Email::MIME libemail-mime-perl
- Email::MIME::ContentType libemail-mime-contenttype-perl
- Encode::MIME::Header perl
- - File::Path::Expand libfile-path-expand-perl
- IPC::Run libipc-run-perl
Optional modules:
diff --git a/Makefile.PL b/Makefile.PL
index 61cb77b..4f25312 100644
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -23,7 +23,6 @@ WriteMakefile(
'Email::MIME::ContentType' => 0,
'Email::Simple' => 0,
'Encode::MIME::Header' => 0,
- 'File::Path::Expand' => 0,
'IPC::Run' => 0,
'Mail::Thread' => '2.5', # 2.5+ needed for Email::Simple compat
'Plack' => 0,
diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm
index a8c5105..4651861 100644
--- a/lib/PublicInbox/Config.pm
+++ b/lib/PublicInbox/Config.pm
@@ -7,7 +7,6 @@ use strict;
use warnings;
require PublicInbox::Inbox;
use PublicInbox::Spawn qw(popen_rd);
-use File::Path::Expand qw/expand_filename/;
# returns key-value pairs of config directives in a hash
# if keys may be multi-value, the value is an array ref containing all values
@@ -82,7 +81,7 @@ sub get {
$self->{"publicinbox.$inbox.$key"};
}
-sub config_dir { $ENV{PI_DIR} || expand_filename('~/.public-inbox') }
+sub config_dir { $ENV{PI_DIR} || "$ENV{HOME}/.public-inbox" }
sub default_file {
my $f = $ENV{PI_CONFIG};
diff --git a/script/public-inbox-mda b/script/public-inbox-mda
index bb78c4e..84219ac 100755
--- a/script/public-inbox-mda
+++ b/script/public-inbox-mda
@@ -11,7 +11,6 @@ use Email::Filter;
use Email::MIME;
use Email::MIME::ContentType;
$Email::MIME::ContentType::STRICT_PARAMS = 0; # user input is imperfect
-use File::Path::Expand qw/expand_filename/;
use IPC::Run qw(run);
use PublicInbox::MDA;
use PublicInbox::Filter;
@@ -22,8 +21,7 @@ use PublicInbox::Git;
# n.b: hopefully we can setup the emergency path without bailing due to
# user error, we really want to setup the emergency destination ASAP
# in case there's bugs in our code or user error.
-my $emergency = $ENV{PI_EMERGENCY} || '~/.public-inbox/emergency/';
-$emergency = expand_filename($emergency);
+my $emergency = $ENV{PI_EMERGENCY} || "$ENV{HOME}/.public-inbox/emergency/";
# this reads the message from stdin
my $filter = Email::Filter->new(emergency => $emergency);
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 2/9] t/feed.t: make IPC::Run usage optional
2016-06-15 0:37 [PATCH 0/9] big mda filter changes Eric Wong
2016-06-15 0:37 ` [PATCH 1/9] drop dependency on File::Path::Expand Eric Wong
@ 2016-06-15 0:37 ` Eric Wong
2016-06-15 0:37 ` [PATCH 3/9] learn: remove IPC::Run dependency Eric Wong
` (6 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Eric Wong @ 2016-06-15 0:37 UTC (permalink / raw)
To: meta
Since ssoma is optional, here, IPC::Run shall also be optional.
(And it may be removed entirely in the future).
---
t/feed.t | 18 +++++++-----------
1 file changed, 7 insertions(+), 11 deletions(-)
diff --git a/t/feed.t b/t/feed.t
index ab92039..26c4bce 100644
--- a/t/feed.t
+++ b/t/feed.t
@@ -8,7 +8,6 @@ use PublicInbox::Feed;
use PublicInbox::Git;
use PublicInbox::Import;
use PublicInbox::Config;
-use IPC::Run qw/run/;
use File::Temp qw/tempdir/;
my $have_xml_feed = eval { require XML::Feed; 1 };
require 't/common.perl';
@@ -22,6 +21,8 @@ sub string_feed {
my %SSOMA;
sub rand_use ($) {
return 0 if $ENV{FAST};
+ eval { require IPC::Run };
+ return 0 if $@;
my $cmd = $_[0];
my $x = $SSOMA{$cmd};
unless ($x) {
@@ -80,7 +81,7 @@ EOF
if (rand_use('ssoma-mda')) {
$im->done;
my $str = $mime->as_string;
- run(['ssoma-mda', $git_dir], \$str) or
+ IPC::Run::run(['ssoma-mda', $git_dir], \$str) or
die "mda failed: $?\n";
} else {
like($im->add($mime), qr/\A:\d+/, 'added');
@@ -123,14 +124,9 @@ Date: Thu, 01 Jan 1970 00:00:00 +0000
EOF
if (rand_use('ssoma-mda')) {
- my $pid = open(my $pipe, "|-");
- defined $pid or die "fork/pipe failed: $!";
- if ($pid == 0) {
- exec("ssoma-mda", $git_dir);
- }
-
- print $pipe $spam->as_string or die "print failed: $!";
- close $pipe or die "close pipe failed: $!";
+ my $str = $spam->as_string;
+ IPC::Run::run(['ssoma-mda', $git_dir], \$str) or
+ die "mda failed: $?\n";
} else {
$im->add($spam);
$im->done;
@@ -155,7 +151,7 @@ EOF
# nuke spam
if (rand_use('ssoma-rm')) {
my $spam_str = $spam->as_string;
- run(["ssoma-rm", $git_dir], \$spam_str) or
+ IPC::Run::run(["ssoma-rm", $git_dir], \$spam_str) or
die "ssoma-rm failed: $?\n";
} else {
$im->remove($spam);
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 3/9] learn: remove IPC::Run dependency
2016-06-15 0:37 [PATCH 0/9] big mda filter changes Eric Wong
2016-06-15 0:37 ` [PATCH 1/9] drop dependency on File::Path::Expand Eric Wong
2016-06-15 0:37 ` [PATCH 2/9] t/feed.t: make IPC::Run usage optional Eric Wong
@ 2016-06-15 0:37 ` Eric Wong
2016-06-15 0:37 ` [PATCH 4/9] t/mda.t: remove senseless use of Email::Filter Eric Wong
` (5 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Eric Wong @ 2016-06-15 0:37 UTC (permalink / raw)
To: meta
We'll be relying on our spawn implementation, for now;
since it'll be consistent with the rest of our code and
can optionally take advantage of vfork.
---
script/public-inbox-learn | 36 ++++++++++++++++++++++++------------
1 file changed, 24 insertions(+), 12 deletions(-)
diff --git a/script/public-inbox-learn b/script/public-inbox-learn
index bfbf023..783cf03 100755
--- a/script/public-inbox-learn
+++ b/script/public-inbox-learn
@@ -14,17 +14,35 @@ use Email::MIME;
use Email::MIME::ContentType;
$Email::MIME::ContentType::STRICT_PARAMS = 0; # user input is imperfect
use PublicInbox::Address;
-use IPC::Run qw/run/;
+use PublicInbox::Spawn qw(spawn);
my $train = shift or die "usage: $usage\n";
if ($train !~ /\A(?:ham|spam)\z/) {
die "`$train' not recognized.\nusage: $usage\n";
}
my $pi_config = PublicInbox::Config->new;
+my $err;
my $mime = Email::MIME->new(eval {
local $/;
my $data = scalar <STDIN>;
$data =~ s/\AFrom [^\r\n]*\r?\n//s;
+ eval {
+ my @cmd = (qw(spamc -L), $train);
+ my ($r, $w);
+ pipe($r, $w) or die "pipe failed: $!";
+ open my $null, '>', '/dev/null' or
+ die "failed to open /dev/null: $!";
+ my $nullfd = fileno($null);
+ my %rdr = (0 => fileno($r), 1 => $nullfd, 2 => $nullfd);
+ my $pid = spawn(\@cmd, undef, \%rdr);
+ close $null;
+ close $r or die "close \$r failed: $!";
+ print $w $data or die "print \$w failed: $!";
+ close $w or die "close \$w failed: $!";
+ waitpid($pid, 0);
+ die "spamc failed with: $?\n" if $?;
+ };
+ $err = $@;
$data
});
@@ -43,14 +61,10 @@ if ($train eq "ham") {
PublicInbox::Filter->run($mime);
}
-my $err = 0;
-my @output = qw(> /dev/null > /dev/null);
-
# n.b. message may be cross-posted to multiple public-inboxes
foreach my $recipient (keys %dests) {
my $dst = $pi_config->lookup($recipient) or next;
my $git_dir = $dst->{mainrepo} or next;
- my ($out, $err) = ("", "");
my $git = PublicInbox::Git->new($git_dir);
# We do not touch GIT_COMMITTER_* env here so we can track
# who trained the message.
@@ -74,15 +88,13 @@ foreach my $recipient (keys %dests) {
$im->add($mime);
}
$im->done;
- my $in = $mime->as_string;
- if (!run([qw(spamc -L), $train], \$in, @output)) {
- $err = 1;
- }
-
- $err or eval {
+ eval {
require PublicInbox::SearchIdx;
PublicInbox::SearchIdx->new($git_dir, 2)->index_sync;
};
}
-exit $err;
+if ($err) {
+ warn $err;
+ exit 1;
+}
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 4/9] t/mda.t: remove senseless use of Email::Filter
2016-06-15 0:37 [PATCH 0/9] big mda filter changes Eric Wong
` (2 preceding siblings ...)
2016-06-15 0:37 ` [PATCH 3/9] learn: remove IPC::Run dependency Eric Wong
@ 2016-06-15 0:37 ` Eric Wong
2016-06-15 0:37 ` [PATCH 5/9] t/mda: use only Maildir for testing Eric Wong
` (4 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Eric Wong @ 2016-06-15 0:37 UTC (permalink / raw)
To: meta
Totally unnecessary...
---
t/mda.t | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/t/mda.t b/t/mda.t
index fdba967..66ba859 100644
--- a/t/mda.t
+++ b/t/mda.t
@@ -4,7 +4,6 @@ use strict;
use warnings;
use Test::More;
use Email::MIME;
-use Email::Filter;
use File::Temp qw/tempdir/;
use Cwd;
use IPC::Run qw(run);
@@ -54,8 +53,7 @@ local $ENV{GIT_COMMITTER_NAME} = eval {
open(my $fh, '<', $mbox) or die "failed to open mbox: $mbox\n";
my $str = eval { local $/; <$fh> };
close $fh;
- my $msg = Email::Filter->new(data => $str);
- $msg = Email::MIME->new($msg->simple->as_string);
+ my $msg = Email::MIME->new($str);
my $from = $msg->header('From');
my $author = PublicInbox::Address::from_name($from);
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 5/9] t/mda: use only Maildir for testing
2016-06-15 0:37 [PATCH 0/9] big mda filter changes Eric Wong
` (3 preceding siblings ...)
2016-06-15 0:37 ` [PATCH 4/9] t/mda.t: remove senseless use of Email::Filter Eric Wong
@ 2016-06-15 0:37 ` Eric Wong
2016-06-15 0:37 ` [PATCH 6/9] mda: precheck no longer depends on Email::Filter Eric Wong
` (3 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Eric Wong @ 2016-06-15 0:37 UTC (permalink / raw)
To: meta
Remove mbox tests since mbox is unreliable due to raciness
and incompatible implementations. We will drop support for
mbox emergency destinations, soon.
---
t/mda.t | 75 +++++++++++------------------------------------------------------
1 file changed, 12 insertions(+), 63 deletions(-)
diff --git a/t/mda.t b/t/mda.t
index 66ba859..3456699 100644
--- a/t/mda.t
+++ b/t/mda.t
@@ -22,7 +22,7 @@ my $fail_bin = getcwd()."/t/fail-bin";
my $fail_path = "$fail_bin:$ENV{PATH}"; # for spamc spam mock
my $addr = 'test-public@example.com';
my $cfgpfx = "publicinbox.test";
-my $failbox = "$home/fail.mbox";
+my $faildir = "$home/faildir/";
my $mime;
{
@@ -72,7 +72,7 @@ die $@ if $@;
{
my $good_rev;
- local $ENV{PI_EMERGENCY} = $failbox;
+ local $ENV{PI_EMERGENCY} = $faildir;
local $ENV{HOME} = $home;
local $ENV{ORIGINAL_RECIPIENT} = $addr;
my $simple = Email::Simple->new(<<EOF);
@@ -103,12 +103,14 @@ EOF
# ensure failures work, fail with bad spamc
{
- ok(!-e $failbox, "nothing in PI_EMERGENCY before");
+ my @prev = <$faildir/new/*>;
+ is(scalar @prev, 0 , "nothing in PI_EMERGENCY before");
local $ENV{PATH} = $fail_path;
run([$mda], \$in);
my @revs = `git --git-dir=$maindir rev-list HEAD`;
is(scalar @revs, 1, "bad revision not committed");
- ok(-s $failbox > 0, "PI_EMERGENCY is written to");
+ my @new = <$faildir/new/*>;
+ is(scalar @new, 1, "PI_EMERGENCY is written to");
}
fail_bad_header($good_rev, "bad recipient", <<"");
@@ -158,7 +160,7 @@ Date: deadbeef
# spam training
{
- local $ENV{PI_EMERGENCY} = $failbox;
+ local $ENV{PI_EMERGENCY} = $faildir;
local $ENV{HOME} = $home;
local $ENV{ORIGINAL_RECIPIENT} = $addr;
local $ENV{PATH} = $main_path;
@@ -193,7 +195,7 @@ EOF
# train ham message
{
- local $ENV{PI_EMERGENCY} = $failbox;
+ local $ENV{PI_EMERGENCY} = $faildir;
local $ENV{HOME} = $home;
local $ENV{ORIGINAL_RECIPIENT} = $addr;
local $ENV{PATH} = $main_path;
@@ -263,72 +265,19 @@ EOF
}
}
-# faildir - emergency destination is maildir
-{
- my $faildir= "$home/faildir/";
- local $ENV{PI_EMERGENCY} = $faildir;
- local $ENV{HOME} = $home;
- local $ENV{ORIGINAL_RECIPIENT} = $addr;
- local $ENV{PATH} = $fail_path;
- my $in = <<EOF;
-From: Faildir <faildir\@example.com>
-To: You <you\@example.com>
-Cc: $addr
-Message-ID: <faildir\@example.com>
-Subject: faildir subject
-Date: Thu, 01 Jan 1970 00:00:00 +0000
-
-EOF
- run([$mda], \$in);
- ok(-d $faildir, "emergency exists");
- my @new = glob("$faildir/new/*");
- is(scalar(@new), 1, "message delivered");
- is(unlink(@new), 1, "removed emergency message");
-
- local $ENV{PATH} = $main_path;
- $in = <<EOF;
-From: Faildir <faildir\@example.com>
-To: $addr
-Content-Type: text/html
-Message-ID: <faildir\@example.com>
-Subject: faildir subject
-Date: Thu, 01 Jan 1970 00:00:00 +0000
-
-<html><body>bad</body></html>
-EOF
- my $out = '';
- my $err = '';
- run([$mda], \$in, \$out, \$err);
- isnt($?, 0, "mda exited with failure");
- is(length $out, 0, 'nothing in stdout');
- isnt(length $err, 0, 'error message in stderr');
-
- @new = glob("$faildir/new/*");
- is(scalar(@new), 0, "new message did not show up");
-
- # reject multipart again
- $in = $mime->as_string;
- $err = '';
- run([$mda], \$in, \$out, \$err);
- isnt($?, 0, "mda exited with failure");
- is(length $out, 0, 'nothing in stdout');
- isnt(length $err, 0, 'error message in stderr');
- @new = glob("$faildir/new/*");
- is(scalar(@new), 0, "new message did not show up");
-}
-
done_testing();
sub fail_bad_header {
my ($good_rev, $msg, $in) = @_;
- open my $fh, '>', $failbox or die "failed to open $failbox: $!\n";
- close $fh or die "failed to close $failbox: $!\n";
+ my @f = glob("$faildir/*/*");
+ unlink @f if @f;
my ($out, $err) = ("", "");
local $ENV{PATH} = $main_path;
run([$mda], \$in, \$out, \$err);
my $rev = `git --git-dir=$maindir rev-list HEAD`;
chomp $rev;
is($rev, $good_rev, "bad revision not commited ($msg)");
- ok(-s $failbox > 0, "PI_EMERGENCY is written to ($msg)");
+ @f = glob("$faildir/*/*");
+ is(scalar @f, 1, "faildir written to");
[ $in, $out, $err ];
}
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 6/9] mda: precheck no longer depends on Email::Filter
2016-06-15 0:37 [PATCH 0/9] big mda filter changes Eric Wong
` (4 preceding siblings ...)
2016-06-15 0:37 ` [PATCH 5/9] t/mda: use only Maildir for testing Eric Wong
@ 2016-06-15 0:37 ` Eric Wong
2016-06-15 0:37 ` [PATCH 7/9] filter: begin work on a new filter API Eric Wong
` (2 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Eric Wong @ 2016-06-15 0:37 UTC (permalink / raw)
To: meta
Email::Filter doesn't offer any functionality we need, here;
and our dependency on Email::Filter will gradually be removed
since it (and Email::LocalDelivery) seem abandoned and we
can have more-fine-grained control by rolling our own Maildir
delivery which can work transactionally.
---
lib/PublicInbox/MDA.pm | 11 +++++------
script/public-inbox-mda | 2 +-
t/precheck.t | 14 +++++---------
3 files changed, 11 insertions(+), 16 deletions(-)
diff --git a/lib/PublicInbox/MDA.pm b/lib/PublicInbox/MDA.pm
index 2e6e9ec..0f583e6 100644
--- a/lib/PublicInbox/MDA.pm
+++ b/lib/PublicInbox/MDA.pm
@@ -32,18 +32,17 @@ sub __drop_plus {
# do not allow Bcc, only Cc and To if recipient is set
sub precheck {
- my ($klass, $filter, $address) = @_;
- my Email::Simple $simple = $filter->simple;
+ my ($klass, $simple, $address) = @_;
my @mid = $simple->header('Message-ID');
return 0 if scalar(@mid) != 1;
my $mid = $mid[0];
return 0 if (length($mid) > MAX_MID_SIZE);
return 0 unless usable_str(length('<m@h>'), $mid) && $mid =~ /\@/;
- return 0 unless usable_str(length('u@h'), $filter->from);
+ return 0 unless usable_str(length('u@h'), $simple->header("From"));
return 0 unless usable_str(length(':o'), $simple->header("Subject"));
return 0 unless usable_date($simple->header("Date"));
return 0 if length($simple->as_string) > MAX_SIZE;
- alias_specified($filter, $address);
+ alias_specified($simple, $address);
}
sub usable_str {
@@ -57,14 +56,14 @@ sub usable_date {
}
sub alias_specified {
- my ($filter, $address) = @_;
+ my ($simple, $address) = @_;
my @address = ref($address) eq 'ARRAY' ? @$address : ($address);
my %ok = map {
lc(__drop_plus($_)) => 1;
} @address;
- foreach my $line ($filter->cc, $filter->to) {
+ foreach my $line ($simple->header('Cc'), $simple->header('To')) {
my @addrs = ($line =~ /([^<\s]+\@[^>\s]+)/g);
foreach my $addr (@addrs) {
if ($ok{lc(__drop_plus($addr))}) {
diff --git a/script/public-inbox-mda b/script/public-inbox-mda
index 84219ac..ff2835d 100755
--- a/script/public-inbox-mda
+++ b/script/public-inbox-mda
@@ -34,7 +34,7 @@ defined $dst or exit(1);
my $main_repo = $dst->{mainrepo} or exit(1);
my $filtered; # string dest
-if (PublicInbox::MDA->precheck($filter, $dst->{address}) &&
+if (PublicInbox::MDA->precheck($filter->simple, $dst->{address}) &&
do_spamc($filter->simple, \$filtered)) {
# update our message with SA headers (in case our filter rejects it)
my $msg = Email::MIME->new(\$filtered);
diff --git a/t/precheck.t b/t/precheck.t
index 3f2c5d5..6c353d8 100644
--- a/t/precheck.t
+++ b/t/precheck.t
@@ -4,28 +4,25 @@ use strict;
use warnings;
use Test::More;
use Email::Simple;
-use Email::Filter;
use PublicInbox::MDA;
sub do_checks {
my ($s) = @_;
- my $f = Email::Filter->new(data => $s->as_string);
-
my $recipient = 'foo@example.com';
- ok(!PublicInbox::MDA->precheck($f, $recipient),
+ ok(!PublicInbox::MDA->precheck($s, $recipient),
"wrong ORIGINAL_RECIPIENT rejected");
$recipient = 'b@example.com';
- ok(PublicInbox::MDA->precheck($f, $recipient),
+ ok(PublicInbox::MDA->precheck($s, $recipient),
"ORIGINAL_RECIPIENT in To: is OK");
$recipient = 'c@example.com';
- ok(PublicInbox::MDA->precheck($f, $recipient),
+ ok(PublicInbox::MDA->precheck($s, $recipient),
"ORIGINAL_RECIPIENT in Cc: is OK");
$recipient = [ 'c@example.com', 'd@example.com' ];
- ok(PublicInbox::MDA->precheck($f, $recipient),
+ ok(PublicInbox::MDA->precheck($s, $recipient),
"alias list is OK");
}
@@ -72,8 +69,7 @@ sub do_checks {
],
body => "hello world\n",
);
- my $f = Email::Filter->new(data => $s->as_string);
- ok(!PublicInbox::MDA->precheck($f, $recipient),
+ ok(!PublicInbox::MDA->precheck($s, $recipient),
"missing From: is rejected");
}
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 7/9] filter: begin work on a new filter API
2016-06-15 0:37 [PATCH 0/9] big mda filter changes Eric Wong
` (5 preceding siblings ...)
2016-06-15 0:37 ` [PATCH 6/9] mda: precheck no longer depends on Email::Filter Eric Wong
@ 2016-06-15 0:37 ` Eric Wong
2016-06-15 0:37 ` [PATCH 8/9] emergency: implement new emergency Maildir delivery Eric Wong
2016-06-15 0:37 ` [PATCH 9/9] mda: hook up new filter functionality Eric Wong
8 siblings, 0 replies; 10+ messages in thread
From: Eric Wong @ 2016-06-15 0:37 UTC (permalink / raw)
To: meta
This filter API should be independent of Email::Filter and
hopefully less intrusive to long running processes.
---
lib/PublicInbox/Filter/Base.pm | 100 +++++++++++++++++++++++++++++++++++++++
lib/PublicInbox/Filter/Mirror.pm | 12 +++++
lib/PublicInbox/Filter/Vger.pm | 33 +++++++++++++
t/filter_base.t | 81 +++++++++++++++++++++++++++++++
t/filter_mirror.t | 40 ++++++++++++++++
t/filter_vger.t | 46 ++++++++++++++++++
6 files changed, 312 insertions(+)
create mode 100644 lib/PublicInbox/Filter/Base.pm
create mode 100644 lib/PublicInbox/Filter/Mirror.pm
create mode 100644 lib/PublicInbox/Filter/Vger.pm
create mode 100644 t/filter_base.t
create mode 100644 t/filter_mirror.t
create mode 100644 t/filter_vger.t
diff --git a/lib/PublicInbox/Filter/Base.pm b/lib/PublicInbox/Filter/Base.pm
new file mode 100644
index 0000000..0991e87
--- /dev/null
+++ b/lib/PublicInbox/Filter/Base.pm
@@ -0,0 +1,100 @@
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+#
+# base class for creating per-list or per-project filters
+package PublicInbox::Filter::Base;
+use strict;
+use warnings;
+use PublicInbox::MsgIter;
+use constant MAX_MID_SIZE => 244; # max term size - 1 in Xapian
+
+my $NO_HTML = '*** We only accept plain-text mail, no HTML ***';
+our %DEFAULTS = (
+ reject_suffix => [ qw(exe bat cmd com pif scr vbs cpl zip) ],
+ reject_type => [ "text/html:$NO_HTML", "text/xhtml:$NO_HTML",
+ 'application/vnd.ms-*:No proprietary data formats' ],
+);
+our $INVALID_FN = qr/\0/;
+
+sub REJECT () { 100 }
+sub ACCEPT { scalar @_ > 1 ? $_[1] : 1 }
+sub IGNORE () { 0 }
+
+my %patmap = ('*' => '.*', '?' => '.', '[' => '[', ']' => ']');
+sub glob2pat {
+ my ($glob) = @_;
+ $glob =~ s!(.)!$patmap{$1} || "\Q$1"!ge;
+ $glob;
+}
+
+sub new {
+ my ($class, %opts) = @_;
+ my $self = bless { err => '', %opts }, $class;
+ foreach my $f (qw(reject_suffix reject_type)) {
+ # allow undef:
+ $self->{$f} = $DEFAULTS{$f} unless exists $self->{$f};
+ }
+ if (defined $self->{reject_suffix}) {
+ my $tmp = $self->{reject_suffix};
+ $tmp = join('|', map { glob2pat($_) } @$tmp);
+ $self->{reject_suffix} = qr/\.($tmp)\s*\z/i;
+ }
+ my $rt = [];
+ if (defined $self->{reject_type}) {
+ my $tmp = $self->{reject_type};
+ @$rt = map {
+ my ($type, $msg) = split(':', $_, 2);
+ $type = lc $type;
+ $msg ||= "Unacceptable Content-Type: $type";
+ my $re = glob2pat($type);
+ [ qr/\b$re\b/i, $msg ];
+ } @$tmp;
+ }
+ $self->{reject_type} = $rt;
+ $self;
+}
+
+sub reject ($$) {
+ my ($self, $reason) = @_;
+ $self->{err} = $reason;
+ REJECT;
+}
+
+sub err ($) { $_[0]->{err} }
+
+# for MDA
+sub delivery {
+ my ($self, $mime) = @_;
+
+ my $rt = $self->{reject_type};
+ my $reject_suffix = $self->{reject_suffix} || $INVALID_FN;
+ my (%sfx, %type);
+
+ msg_iter($mime, sub {
+ my ($part, $depth, @idx) = @{$_[0]};
+
+ my $ct = $part->content_type || 'text/plain';
+ foreach my $p (@$rt) {
+ if ($ct =~ $p->[0]) {
+ $type{$p->[1]} = 1;
+ }
+ }
+
+ my $fn = $part->filename;
+ if (defined($fn) && $fn =~ $reject_suffix) {
+ $sfx{$1} = 1;
+ }
+ });
+
+ my @r;
+ if (keys %type) {
+ push @r, sort keys %type;
+ }
+ if (keys %sfx) {
+ push @r, 'Rejected suffixes(s): '.join(', ', sort keys %sfx);
+ }
+
+ @r ? $self->reject(join("\n", @r)) : $self->ACCEPT;
+}
+
+1;
diff --git a/lib/PublicInbox/Filter/Mirror.pm b/lib/PublicInbox/Filter/Mirror.pm
new file mode 100644
index 0000000..d994088
--- /dev/null
+++ b/lib/PublicInbox/Filter/Mirror.pm
@@ -0,0 +1,12 @@
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Dumb filter for blindly accepting everything
+package PublicInbox::Filter::Mirror;
+use base qw(PublicInbox::Filter::Base);
+use strict;
+use warnings;
+
+sub delivery { $_[0]->ACCEPT };
+
+1;
diff --git a/lib/PublicInbox/Filter/Vger.pm b/lib/PublicInbox/Filter/Vger.pm
new file mode 100644
index 0000000..9498081
--- /dev/null
+++ b/lib/PublicInbox/Filter/Vger.pm
@@ -0,0 +1,33 @@
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Filter for vger.kernel.org list trailer
+package PublicInbox::Filter::Vger;
+use base qw(PublicInbox::Filter::Base);
+use strict;
+use warnings;
+
+my $l0 = qr/-+/; # older messages only had one '-'
+my $l1 =
+ qr/To unsubscribe from this list: send the line "unsubscribe [\w-]+" in/;
+my $l2 = qr/the body of a message to majordomo\@vger\.kernel\.org/;
+my $l3 =
+ qr!More majordomo info at +http://vger\.kernel\.org/majordomo-info\.html!;
+
+# only LKML had this, and LKML nowadays has no list trailer since Jan 2016
+my $l4 = qr!Please read the FAQ at +http://www\.tux\.org/lkml/!;
+
+sub delivery {
+ my ($self, $mime) = @_;
+ my $s = $mime->as_string;
+
+ # the vger appender seems to only work on the raw string,
+ # so in multipart (e.g. GPG-signed) messages, the list trailer
+ # becomes invisible to MIME-aware email clients.
+ if ($s =~ s/$l0\n$l1\n$l2\n$l3\n($l4\n)?\z//os) {
+ $mime = Email::MIME->new(\$s);
+ }
+ $self->ACCEPT($mime);
+}
+
+1;
diff --git a/t/filter_base.t b/t/filter_base.t
new file mode 100644
index 0000000..ee5c730
--- /dev/null
+++ b/t/filter_base.t
@@ -0,0 +1,81 @@
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Test::More;
+use Email::MIME;
+use_ok 'PublicInbox::Filter::Base';
+
+{
+ my $f = PublicInbox::Filter::Base->new;
+ ok($f, 'created stock object');
+ ok(defined $f->{reject_suffix}, 'rejected suffix redefined');
+ is(ref($f->{reject_suffix}), 'Regexp', 'reject_suffix should be a RE');
+}
+
+{
+ my $f = PublicInbox::Filter::Base->new(reject_suffix => undef);
+ ok($f, 'created base object q/o reject_suffix');
+ ok(!defined $f->{reject_suffix}, 'reject_suffix not defined');
+}
+
+{
+ my $f = PublicInbox::Filter::Base->new;
+ my $html_body = "<html><body>hi</body></html>";
+ my $parts = [
+ Email::MIME->create(
+ attributes => {
+ content_type => 'text/xhtml; charset=UTF-8',
+ encoding => 'base64',
+ },
+ body => $html_body,
+ ),
+ Email::MIME->create(
+ attributes => {
+ content_type => 'text/plain',
+ encoding => 'quoted-printable',
+ },
+ body => 'hi = "bye"',
+ )
+ ];
+ my $email = Email::MIME->create(
+ header_str => [
+ From => 'a@example.com',
+ Subject => 'blah',
+ 'Content-Type' => 'multipart/alternative'
+ ],
+ parts => $parts,
+ );
+ is($f->delivery($email), 100, "xhtml rejected");
+}
+
+{
+ my $f = PublicInbox::Filter::Base->new;
+ my $parts = [
+ Email::MIME->create(
+ attributes => {
+ content_type => 'application/vnd.ms-excel',
+ encoding => 'base64',
+ },
+ body => 'junk',
+ ),
+ Email::MIME->create(
+ attributes => {
+ content_type => 'text/plain',
+ encoding => 'quoted-printable',
+ },
+ body => 'junk',
+ )
+ ];
+ my $email = Email::MIME->create(
+ header_str => [
+ From => 'a@example.com',
+ Subject => 'blah',
+ 'Content-Type' => 'multipart/mixed'
+ ],
+ parts => $parts,
+ );
+ is($f->delivery($email), 100, 'proprietary format rejected on glob');
+}
+
+done_testing();
diff --git a/t/filter_mirror.t b/t/filter_mirror.t
new file mode 100644
index 0000000..01be282
--- /dev/null
+++ b/t/filter_mirror.t
@@ -0,0 +1,40 @@
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Test::More;
+use Email::MIME;
+use_ok 'PublicInbox::Filter::Mirror';
+
+my $f = PublicInbox::Filter::Mirror->new;
+ok($f, 'created PublicInbox::Filter::Mirror object');
+{
+ my $html_body = "<html><body>hi</body></html>";
+ my $parts = [
+ Email::MIME->create(
+ attributes => {
+ content_type => 'text/html; charset=UTF-8',
+ encoding => 'base64',
+ },
+ body => $html_body,
+ ),
+ Email::MIME->create(
+ attributes => {
+ content_type => 'text/plain',
+ encoding => 'quoted-printable',
+ },
+ body => 'hi = "bye"',
+ )
+ ];
+ my $email = Email::MIME->create(
+ header_str => [
+ From => 'a@example.com',
+ Subject => 'blah',
+ 'Content-Type' => 'multipart/alternative'
+ ],
+ parts => $parts,
+ );
+ is($f->ACCEPT, $f->delivery($email), 'accept any trash that comes');
+}
+
+done_testing();
diff --git a/t/filter_vger.t b/t/filter_vger.t
new file mode 100644
index 0000000..83a4c9e
--- /dev/null
+++ b/t/filter_vger.t
@@ -0,0 +1,46 @@
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Test::More;
+use Email::MIME;
+use_ok 'PublicInbox::Filter::Vger';
+
+my $f = PublicInbox::Filter::Vger->new;
+ok($f, 'created PublicInbox::Filter::Vger object');
+{
+ my $lkml = <<'EOF';
+From: foo@example.com
+Subject: test
+
+keep this
+--
+To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
+the body of a message to majordomo@vger.kernel.org
+More majordomo info at http://vger.kernel.org/majordomo-info.html
+Please read the FAQ at http://www.tux.org/lkml/
+EOF
+
+ my $mime = Email::MIME->new($lkml);
+ $mime = $f->delivery($mime);
+ is("keep this\n", $mime->body, 'normal message filtered OK');
+}
+
+{
+ my $no_nl = <<'EOF';
+From: foo@example.com
+Subject: test
+
+OSX users :P--
+To unsubscribe from this list: send the line "unsubscribe git" in
+the body of a message to majordomo@vger.kernel.org
+More majordomo info at http://vger.kernel.org/majordomo-info.html
+EOF
+
+ my $mime = Email::MIME->new($no_nl);
+ $mime = $f->delivery($mime);
+ is('OSX users :P', $mime->body, 'missing trailing LF in original OK');
+}
+
+
+done_testing();
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 8/9] emergency: implement new emergency Maildir delivery
2016-06-15 0:37 [PATCH 0/9] big mda filter changes Eric Wong
` (6 preceding siblings ...)
2016-06-15 0:37 ` [PATCH 7/9] filter: begin work on a new filter API Eric Wong
@ 2016-06-15 0:37 ` Eric Wong
2016-06-15 0:37 ` [PATCH 9/9] mda: hook up new filter functionality Eric Wong
8 siblings, 0 replies; 10+ messages in thread
From: Eric Wong @ 2016-06-15 0:37 UTC (permalink / raw)
To: meta
This is transactional and hopefully safer in case we hit SIGSEGV
or SIGKILL during processing, as the tmp/ copy will remain on
the FS even if DESTROY/END handlers are not called.
---
lib/PublicInbox/Emergency.pm | 96 ++++++++++++++++++++++++++++++++++++++++++++
t/emergency.t | 53 ++++++++++++++++++++++++
2 files changed, 149 insertions(+)
create mode 100644 lib/PublicInbox/Emergency.pm
create mode 100644 t/emergency.t
diff --git a/lib/PublicInbox/Emergency.pm b/lib/PublicInbox/Emergency.pm
new file mode 100644
index 0000000..e402d30
--- /dev/null
+++ b/lib/PublicInbox/Emergency.pm
@@ -0,0 +1,96 @@
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+#
+# Emergency Maildir delivery for MDA
+package PublicInbox::Emergency;
+use strict;
+use warnings;
+use Fcntl qw(:DEFAULT SEEK_SET);
+use Sys::Hostname qw(hostname);
+use IO::Handle;
+
+sub new {
+ my ($class, $dir) = @_;
+
+ foreach (qw(new tmp cur)) {
+ my $d = "$dir/$_";
+ next if -d $d;
+ require File::Path;
+ File::Path::mkpath($d); # croaks on fatal errors
+ }
+ bless { dir => $dir, files => {}, t => 0, cnt => 0 }, $class;
+}
+
+sub _fn_in {
+ my ($self, $dir) = @_;
+ my @host = split(/\./, hostname);
+ my $now = time;
+ if ($self->{t} != $now) {
+ $self->{t} = $now;
+ $self->{cnt} = 0;
+ } else {
+ $self->{cnt}++;
+ }
+
+ my $f;
+ do {
+ $f = "$self->{dir}/$dir/$self->{t}.$$"."_$self->{cnt}.$host[0]";
+ $self->{cnt}++;
+ } while (-e $f);
+ $f;
+}
+
+sub prepare {
+ my ($self, $strref) = @_;
+
+ die "already in transaction: $self->{tmp}" if $self->{tmp};
+ my ($tmp, $fh);
+ do {
+ $tmp = _fn_in($self, 'tmp');
+ $! = undef;
+ } while (!sysopen($fh, $tmp, O_CREAT|O_EXCL|O_RDWR) && $!{EEXIST});
+ print $fh $$strref or die "write failed: $!";
+ $fh->flush or die "flush failed: $!";
+ $fh->autoflush(1);
+ $self->{fh} = $fh;
+ $self->{tmp} = $tmp;
+}
+
+sub abort {
+ my ($self) = @_;
+ delete $self->{fh};
+ my $tmp = delete $self->{tmp} or return;
+
+ unlink($tmp) or warn "Failed to unlink $tmp: $!";
+ undef;
+}
+
+sub fh {
+ my ($self) = @_;
+ my $fh = $self->{fh} or die "{fh} not open!\n";
+ seek($fh, 0, SEEK_SET) or die "seek(fh) failed: $!";
+ sysseek($fh, 0, SEEK_SET) or die "sysseek(fh) failed: $!";
+ $fh;
+}
+
+sub commit {
+ my ($self) = @_;
+
+ delete $self->{fh};
+ my $tmp = delete $self->{tmp} or return;
+ my $new;
+ do {
+ $new = _fn_in($self, 'new');
+ } while (!link($tmp, $new) && $!{EEXIST});
+ my @sn = stat($new) or die "stat $new failed: $!";
+ my @st = stat($tmp) or die "stat $tmp failed: $!";
+ if ($st[0] == $sn[0] && $st[1] == $sn[1]) {
+ unlink($tmp) or warn "Failed to unlink $tmp: $!";
+ } else {
+ warn "stat($new) and stat($tmp) differ";
+ }
+}
+
+sub DESTROY { commit($_[0]) }
+
+1;
diff --git a/t/emergency.t b/t/emergency.t
new file mode 100644
index 0000000..e480338
--- /dev/null
+++ b/t/emergency.t
@@ -0,0 +1,53 @@
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Test::More;
+use File::Temp qw/tempdir/;
+my $tmpdir = tempdir('emergency-XXXXXX', TMPDIR => 1, CLEANUP => 1);
+use_ok 'PublicInbox::Emergency';
+
+{
+ my $md = "$tmpdir/a";
+ my $em = PublicInbox::Emergency->new($md);
+ ok(-d $md, 'Maildir a auto-created');
+ my @tmp = <$md/tmp/*>;
+ is(scalar @tmp, 0, 'no temporary files exist, yet');
+ $em->prepare(\"BLAH");
+ @tmp = <$md/tmp/*>;
+ is(scalar @tmp, 1, 'globbed one temporary file');
+ open my $fh, '<', $tmp[0] or die "failed to open: $!";
+ is("BLAH", <$fh>, 'wrote contents to temporary location');
+ my @new = <$md/new/*>;
+ is(scalar @new, 0, 'no new files exist, yet');
+ $em = undef;
+ @tmp = <$md/tmp/*>;
+ is(scalar @tmp, 0, 'temporary file no longer exists');
+ @new = <$md/new/*>;
+ is(scalar @new, 1, 'globbed one new file');
+ open $fh, '<', $new[0] or die "failed to open: $!";
+ is("BLAH", <$fh>, 'wrote contents to new location');
+}
+{
+ my $md = "$tmpdir/b";
+ my $em = PublicInbox::Emergency->new($md);
+ ok(-d $md, 'Maildir b auto-created');
+ my @tmp = <$md/tmp/*>;
+ is(scalar @tmp, 0, 'no temporary files exist, yet');
+ $em->prepare(\"BLAH");
+ @tmp = <$md/tmp/*>;
+ is(scalar @tmp, 1, 'globbed one temporary file');
+ open my $fh, '<', $tmp[0] or die "failed to open: $!";
+ is("BLAH", <$fh>, 'wrote contents to temporary location');
+ my @new = <$md/new/*>;
+ is(scalar @new, 0, 'no new files exist, yet');
+ is(sysread($em->fh, my $buf, 9), 4, 'read file handle exposed');
+ is($buf, 'BLAH', 'got expected data');
+ $em->abort;
+ @tmp = <$md/tmp/*>;
+ is(scalar @tmp, 0, 'temporary file no longer exists');
+ @new = <$md/new/*>;
+ is(scalar @new , 0, 'new file no longer exists');
+}
+
+done_testing();
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 9/9] mda: hook up new filter functionality
2016-06-15 0:37 [PATCH 0/9] big mda filter changes Eric Wong
` (7 preceding siblings ...)
2016-06-15 0:37 ` [PATCH 8/9] emergency: implement new emergency Maildir delivery Eric Wong
@ 2016-06-15 0:37 ` Eric Wong
8 siblings, 0 replies; 10+ messages in thread
From: Eric Wong @ 2016-06-15 0:37 UTC (permalink / raw)
To: meta
This removes the Email::Filter dependency as well as the
signature-breaking scrubber code. We now prefer to
reject unacceptable messages and grudgingly (and blindly)
mirror messages we're not the primary endpoint for.
---
INSTALL | 2 -
Makefile.PL | 1 -
lib/PublicInbox/Filter.pm | 232 -------------------------------
script/public-inbox-learn | 6 +-
script/public-inbox-mda | 131 +++++++++---------
t/filter.t | 337 ----------------------------------------------
6 files changed, 69 insertions(+), 640 deletions(-)
delete mode 100644 lib/PublicInbox/Filter.pm
delete mode 100644 t/filter.t
diff --git a/INSTALL b/INSTALL
index 25cc3c9..03b356a 100644
--- a/INSTALL
+++ b/INSTALL
@@ -25,10 +25,8 @@ Requirements (server MDA)
* git
* SpamAssassin (spamc/spamd)
* MTA - postfix is recommended
-* lynx (for converting HTML messages to text)
* Perl and several modules: (Debian package name)
- Date::Parse libtimedate-perl
- - Email::Filter libemail-filter-perl
- Email::MIME libemail-mime-perl
- Email::MIME::ContentType libemail-mime-contenttype-perl
- Encode::MIME::Header perl
diff --git a/Makefile.PL b/Makefile.PL
index 4f25312..0cba59d 100644
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -18,7 +18,6 @@ WriteMakefile(
# We also depend on git.
# Keep this sorted and synced to the INSTALL document
'Date::Parse' => 0,
- 'Email::Filter' => 0,
'Email::MIME' => 0,
'Email::MIME::ContentType' => 0,
'Email::Simple' => 0,
diff --git a/lib/PublicInbox/Filter.pm b/lib/PublicInbox/Filter.pm
deleted file mode 100644
index 8b78a44..0000000
--- a/lib/PublicInbox/Filter.pm
+++ /dev/null
@@ -1,232 +0,0 @@
-# Copyright (C) 2013-2015 all contributors <meta@public-inbox.org>
-# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt)
-#
-# Used to filter incoming mail for -mda and importers
-# This only exposes one function: run
-# Note: the settings here are highly opinionated. Obviously, this is
-# Free Software (AGPLv3), so you may change it if you host yourself.
-package PublicInbox::Filter;
-use strict;
-use warnings;
-use Email::MIME;
-use Email::MIME::ContentType qw/parse_content_type/;
-use Email::Filter;
-use IPC::Run;
-our $VERSION = '0.0.1';
-use constant NO_HTML => '*** We only accept plain-text email, no HTML ***';
-use constant TEXT_ONLY => '*** We only accept plain-text email ***';
-
-# start with the same defaults as mailman
-our $BAD_EXT = qr/\.(exe|bat|cmd|com|pif|scr|vbs|cpl|zip)\s*\z/i;
-our $MIME_HTML = qr!\btext/x?html\b!i;
-our $MIME_TEXT_ANY = qr!\btext/[a-z0-9\+\._-]+\b!i;
-
-# this is highly opinionated delivery
-# returns 0 only if there is nothing to deliver
-sub run {
- my ($class, $mime, $filter) = @_;
-
- my $content_type = $mime->header('Content-Type') || 'text/plain';
-
- if ($content_type =~ m!\btext/plain\b!i) {
- return 1; # yay, nothing to do
- } elsif ($content_type =~ $MIME_HTML) {
- $filter->reject(NO_HTML) if $filter;
- # HTML-only, non-multipart
- my $body = $mime->body;
- my $ct_parsed = parse_content_type($content_type);
- dump_html(\$body, $ct_parsed->{attributes}->{charset});
- replace_body($mime, $body);
- return 1;
- } elsif ($content_type =~ m!\bmultipart/!i) {
- return strip_multipart($mime, $content_type, $filter);
- } else {
- $filter->reject(TEXT_ONLY) if $filter;
- replace_body($mime, "$content_type message scrubbed");
- return 0;
- }
-}
-
-sub replace_part {
- my ($mime, $part, $type) = ($_[0], $_[1], $_[3]);
- # don't copy $_[2], that's the body (it may be huge)
-
- # Email::MIME insists on setting Date:, so just set it consistently
- # to avoid conflicts to avoid git merge conflicts in a split brain
- # situation.
- unless (defined $part->header('Date')) {
- my $date = $mime->header('Date') ||
- 'Thu, 01 Jan 1970 00:00:00 +0000';
- $part->header_set('Date', $date);
- }
-
- $part->charset_set(undef);
- $part->name_set(undef);
- $part->filename_set(undef);
- $part->format_set(undef);
- $part->encoding_set('8bit');
- $part->disposition_set(undef);
- $part->content_type_set($type);
- $part->body_set($_[2]);
-}
-
-# converts one part of a multipart message to text
-sub html_part_to_text {
- my ($mime, $part) = @_;
- my $body = $part->body;
- my $ct_parsed = parse_content_type($part->content_type);
- dump_html(\$body, $ct_parsed->{attributes}->{charset});
- replace_part($mime, $part, $body, 'text/plain');
-}
-
-# modifies $_[0] in place
-sub dump_html {
- my ($body, $charset) = @_;
- $charset ||= 'US-ASCII';
- my @cmd = qw(lynx -stdin -stderr -dump);
- my $out = "";
- my $err = "";
-
- # be careful about remote command injection!
- if ($charset =~ /\A([A-Za-z0-9\-]+)\z/) {
- push @cmd, "-assume_charset=$charset";
- }
- if (IPC::Run::run(\@cmd, $body, \$out, \$err)) {
- $out =~ s/\r\n/\n/sg;
- $$body = $out;
- } else {
- # give them an ugly version:
- $$body = "public-inbox HTML conversion failed: $err\n" .
- $$body . "\n";
- }
-}
-
-# this is to correct old archives during import.
-sub strip_multipart {
- my ($mime, $content_type, $filter) = @_;
-
- my (@html, @keep);
- my $rejected = 0;
- my $ok = 1;
-
- # scan through all parts once
- $mime->walk_parts(sub {
- my ($part) = @_;
- return if $part->subparts; # walk_parts already recurses
-
- # some extensions are just bad, reject them outright
- my $fn = $part->filename;
- if (defined($fn) && $fn =~ $BAD_EXT) {
- $filter->reject("Bad file type: $1") if $filter;
- $rejected++;
- return;
- }
-
- my $part_type = $part->content_type || '';
- if ($part_type =~ m!\btext/plain\b!i) {
- push @keep, $part;
- } elsif ($part_type =~ $MIME_HTML) {
- $filter->reject(NO_HTML) if $filter;
- push @html, $part;
- } elsif ($part_type =~ $MIME_TEXT_ANY) {
- # Give other text attachments the benefit of the doubt,
- # here? Could be source code or script the user wants
- # help with.
-
- push @keep, $part;
- } elsif ($part_type eq '' ||
- $part_type =~ m!\bapplication/octet-stream\b!i) {
- # unfortunately, some mailers don't set correct types,
- # let messages of unknown type through but do not
- # change the sender-specified type
- if (recheck_type_ok($part)) {
- push @keep, $part;
- } elsif ($filter) {
- $filter->reject("Bad attachment: $part_type ".
- TEXT_ONLY);
- } else {
- $rejected++;
- }
- } elsif ($part_type =~ m!\bapplication/pgp-signature\b!i) {
- # PGP signatures are not huge, we may keep them.
- # They can only be valid if it's the last element,
- # so we keep them iff the message is unmodified:
- if ($rejected == 0 && !@html) {
- push @keep, $part;
- }
- } elsif ($filter) {
- $filter->reject("unacceptable mime-type: $part_type ".
- TEXT_ONLY);
- } else {
- # reject everything else, including non-PGP signatures
- $rejected++;
- }
- });
-
- if ($content_type =~ m!\bmultipart/alternative\b!i) {
- if (scalar @keep == 1) {
- return collapse($mime, $keep[0]);
- }
- } else { # convert HTML parts to plain text
- foreach my $part (@html) {
- html_part_to_text($mime, $part);
- push @keep, $part;
- }
- }
-
- if (@keep == 0) {
- @keep = (Email::MIME->create(
- attributes => {
- content_type => 'text/plain',
- charset => 'US-ASCII',
- encoding => '8bit',
- },
- body_str => 'all attachments scrubbed by '. __PACKAGE__
- ));
- $ok = 0;
- }
- if (scalar(@html) || $rejected) {
- $mime->parts_set(\@keep);
- $mime->body_set($mime->body_raw);
- mark_changed($mime);
- } # else: no changes
-
- return $ok;
-}
-
-sub mark_changed {
- my ($mime) = @_;
- $mime->header_set('X-Content-Filtered-By', __PACKAGE__ ." $VERSION");
-}
-
-sub collapse {
- my ($mime, $part) = @_;
- $mime->header_set('Content-Type', $part->content_type);
- $mime->body_set($part->body_raw);
- my $cte = $part->header('Content-Transfer-Encoding');
- if (defined($cte) && $cte ne '') {
- $mime->header_set('Content-Transfer-Encoding', $cte);
- }
- mark_changed($mime);
- return 1;
-}
-
-sub replace_body {
- my $mime = $_[0];
- $mime->body_set($_[1]);
- $mime->header_set('Content-Type', 'text/plain');
- if ($mime->header('Content-Transfer-Encoding')) {
- $mime->header_set('Content-Transfer-Encoding', undef);
- }
- mark_changed($mime);
-}
-
-# Check for display-able text, no messed up binaries
-# Note: we can not rewrite the message with the detected mime type
-sub recheck_type_ok {
- my ($part) = @_;
- my $s = $part->body;
- ((length($s) < 0x10000) && ($s =~ /\A([[:print:]\s]+)\z/s));
-}
-
-1;
diff --git a/script/public-inbox-learn b/script/public-inbox-learn
index 783cf03..817fd5e 100755
--- a/script/public-inbox-learn
+++ b/script/public-inbox-learn
@@ -55,11 +55,7 @@ foreach my $h (qw(Cc To)) {
}
}
-if ($train eq "ham") {
- require PublicInbox::MDA;
- require PublicInbox::Filter;
- PublicInbox::Filter->run($mime);
-}
+require PublicInbox::MDA if $train eq "ham";
# n.b. message may be cross-posted to multiple public-inboxes
foreach my $recipient (keys %dests) {
diff --git a/script/public-inbox-mda b/script/public-inbox-mda
index ff2835d..63096fe 100755
--- a/script/public-inbox-mda
+++ b/script/public-inbox-mda
@@ -6,97 +6,102 @@
use strict;
use warnings;
my $usage = 'public-inbox-mda < rfc2822_message';
+my ($ems, $emm);
-use Email::Filter;
+sub do_exit {
+ my ($code) = shift;
+ $emm = $ems = undef; # trigger DESTROY
+ exit $code;
+}
+
+use Email::Simple;
use Email::MIME;
use Email::MIME::ContentType;
$Email::MIME::ContentType::STRICT_PARAMS = 0; # user input is imperfect
-use IPC::Run qw(run);
use PublicInbox::MDA;
-use PublicInbox::Filter;
use PublicInbox::Config;
use PublicInbox::Import;
use PublicInbox::Git;
+use PublicInbox::Emergency;
+use PublicInbox::Filter::Base;
+use PublicInbox::Spawn qw(popen_rd);
# n.b: hopefully we can setup the emergency path without bailing due to
# user error, we really want to setup the emergency destination ASAP
# in case there's bugs in our code or user error.
my $emergency = $ENV{PI_EMERGENCY} || "$ENV{HOME}/.public-inbox/emergency/";
-
-# this reads the message from stdin
-my $filter = Email::Filter->new(emergency => $emergency);
+$ems = PublicInbox::Emergency->new($emergency);
+my $str = eval { local $/; <STDIN> };
+$ems->prepare(\$str);
+my $simple = Email::Simple->new(\$str);
my $config = PublicInbox::Config->new;
my $recipient = $ENV{ORIGINAL_RECIPIENT};
defined $recipient or die "ORIGINAL_RECIPIENT not defined in ENV\n";
my $dst = $config->lookup($recipient); # first check
-defined $dst or exit(1);
-my $main_repo = $dst->{mainrepo} or exit(1);
-my $filtered; # string dest
+defined $dst or do_exit(1);
+my $main_repo = $dst->{mainrepo} or do_exit(1);
+
+# pre-check, MDA has stricter rules than an importer might;
+do_exit(0) unless PublicInbox::MDA->precheck($simple, $dst->{address});
-if (PublicInbox::MDA->precheck($filter->simple, $dst->{address}) &&
- do_spamc($filter->simple, \$filtered)) {
- # update our message with SA headers (in case our filter rejects it)
- my $msg = Email::MIME->new(\$filtered);
- $filtered = undef;
- $filter->simple($msg);
+$str = '';
+my $spam_ok = do_spamc($ems->fh, \$str);
+$simple = undef;
+$emm = PublicInbox::Emergency->new($emergency);
+$emm->prepare(\$str);
+$ems = $ems->abort;
+my $mime = Email::MIME->new(\$str);
+$str = '';
+do_exit(0) unless $spam_ok;
- my $filter_arg;
- my $fcfg = $dst->{filter};
- if (!defined $fcfg || $filter eq 'reject') {
- $filter_arg = $filter;
- } elsif ($fcfg eq 'scrub') {
- $filter_arg = undef; # the default for legacy versions
- } else {
- warn "publicinbox.$dst->{name}.filter=$fcfg invalid\n";
- warn "must be either 'scrub' or 'reject' (the default)\n";
- }
+my $fcfg = $dst->{filter} || '';
+my $filter;
+if ($fcfg eq 'scrub') { # TODO:
+ require PublicInbox::Filter::Mirror;
+ $filter = PublicInbox::Filter::Mirror->new;
+} else {
+ $filter = PublicInbox::Filter::Base->new;
+}
- if (PublicInbox::Filter->run($msg, $filter_arg)) {
- # run spamc again on the HTML-free message
- if (do_spamc($msg, \$filtered)) {
- $msg = Email::MIME->new(\$filtered);
- PublicInbox::MDA->set_list_headers($msg, $dst);
- $filter->simple($msg);
+my $ret = $filter->delivery($mime);
+if (ref($ret) && $ret->isa('Email::MIME')) { # filter altered message
+ $mime = $ret;
+} elsif ($ret == PublicInbox::Filter::Base::IGNORE) {
+ do_exit(0); # chuck it to emergency
+} elsif ($ret == PublicInbox::Filter::Base::REJECT) {
+ $! = $ret;
+ die $filter->err, "\n";
+} # else { accept
- END {
- index_sync($main_repo) if ($? == 0);
- };
- my $git = PublicInbox::Git->new($main_repo);
- my $im = PublicInbox::Import->new($git,
- $dst->{name}, $recipient);
- if (defined $im->add($msg)) {
- $im->done;
- $filter->ignore; # exits
- }
- # this message is similar to what ssoma-mda shows:
- print STDERR "CONFLICT: Message-ID: ",
- $msg->header_obj->header_raw('Message-ID'),
- " exists\n";
- }
- }
+PublicInbox::MDA->set_list_headers($mime, $dst);
+END { index_sync($main_repo) if $? == 0 };
+my $git = PublicInbox::Git->new($main_repo);
+my $im = PublicInbox::Import->new($git, $dst->{name}, $recipient);
+if (defined $im->add($mime)) {
+ $im->done;
+ $emm = $emm->abort;
} else {
- # Ensure emergency spam gets spamassassin headers.
- # This makes it easier to prioritize obvious spam from less obvious
- if (defined($filtered) && $filtered ne '') {
- my $drop = Email::MIME->new(\$filtered);
- $filtered = undef;
- $filter->simple($drop);
- }
+ # this message is similar to what ssoma-mda shows:
+ print STDERR "CONFLICT: Message-ID: ",
+ $mime->header_obj->header_raw('Message-ID'),
+ " exists\n";
}
-exit 0; # goes to emergency
+do_exit(0);
# we depend on "report_safe 0" in /etc/spamassassin/*.cf with --headers
-# not using Email::Filter->pipe here since we want the stdout of
-# the command even on failure (spamc will set $? on error).
sub do_spamc {
- my ($msg, $out) = @_;
- eval {
- my $orig = $msg->as_string;
- run([qw/spamc -E --headers/], \$orig, $out);
- };
+ my ($in, $out) = @_;
+ my $rdr = { 0 => fileno($in) };
+ my ($fh, $pid) = popen_rd([qw/spamc -E --headers/], undef, $rdr);
+ my $r;
+ do {
+ $r = sysread($fh, $$out, 65536, length($$out));
+ } while (defined($r) && $r != 0);
+ close $fh or die "close failed: $!\n";
+ waitpid($pid, 0);
- return ($@ || $? || !defined($$out) || $$out eq '') ? 0 : 1;
+ ($? || $$out eq '') ? 0 : 1;
}
sub index_sync {
diff --git a/t/filter.t b/t/filter.t
deleted file mode 100644
index 609a192..0000000
--- a/t/filter.t
+++ /dev/null
@@ -1,337 +0,0 @@
-# Copyright (C) 2013-2015 all contributors <meta@public-inbox.org>
-# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt)
-use strict;
-use warnings;
-use Test::More;
-use Email::MIME;
-use PublicInbox::Filter;
-
-sub count_body_parts {
- my ($bodies, $part) = @_;
- my $body = $part->body_raw;
- $body =~ s/\A\s*//;
- $body =~ s/\s*\z//;
- $bodies->{$body} ||= 0;
- $bodies->{$body}++;
-}
-
-# multipart/alternative: HTML and quoted-printable, keep the plain-text
-{
- my $html_body = "<html><body>hi</body></html>";
- my $parts = [
- Email::MIME->create(
- attributes => {
- content_type => 'text/html; charset=UTF-8',
- encoding => 'base64',
- },
- body => $html_body,
- ),
- Email::MIME->create(
- attributes => {
- content_type => 'text/plain',
- encoding => 'quoted-printable',
- },
- body => 'hi = "bye"',
- )
- ];
- my $email = Email::MIME->create(
- header_str => [
- From => 'a@example.com',
- Subject => 'blah',
- 'Content-Type' => 'multipart/alternative'
- ],
- parts => $parts,
- );
- is(1, PublicInbox::Filter->run($email), "run was a success");
- my $parsed = Email::MIME->new($email->as_string);
- is("text/plain", $parsed->header("Content-Type"));
- is(scalar $parsed->parts, 1, "HTML part removed");
- my %bodies;
- $parsed->walk_parts(sub {
- my ($part) = @_;
- return if $part->subparts; # walk_parts already recurses
- count_body_parts(\%bodies, $part);
- });
- is(scalar keys %bodies, 1, "one bodies");
- is($bodies{"hi =3D \"bye\"="}, 1, "QP text part unchanged");
- $parsed->walk_parts(sub {
- my ($part) = @_;
- my $b = $part->body;
- $b =~ s/\s*\z//;
- is($b, "hi = \"bye\"", "decoded body matches");
- });
-}
-
-# plain-text email is passed through unchanged
-{
- my $s = Email::MIME->create(
- header => [
- From => 'a@example.com',
- To => 'b@example.com',
- 'Content-Type' => 'text/plain',
- Subject => 'this is a subject',
- ],
- body => "hello world\n",
- );
- is(1, PublicInbox::Filter->run($s), "run was a success");
-}
-
-# convert single-part HTML to plain-text
-{
- my $s = Email::MIME->create(
- header => [
- From => 'a@example.com',
- To => 'b@example.com',
- 'Content-Type' => 'text/html',
- Subject => 'HTML only badness',
- ],
- body => "<html><body>bad body\r\n</body></html>\n",
- );
- is(1, PublicInbox::Filter->run($s), "run was a success");
- unlike($s->as_string, qr/<html>/, "HTML removed");
- is("text/plain", $s->header("Content-Type"),
- "content-type changed");
- like($s->body, qr/\A\s*bad body\s*\z/, "body");
- unlike($s->body, qr/\r/, "body has no cr");
- like($s->header("X-Content-Filtered-By"),
- qr/PublicInbox::Filter/, "XCFB header added");
-}
-
-# multipart/alternative: HTML and plain-text, keep the plain-text
-{
- my $html_body = "<html><body>hi</body></html>";
- my $parts = [
- Email::MIME->create(
- attributes => {
- content_type => 'text/html; charset=UTF-8',
- encoding => 'base64',
- },
- body => $html_body,
- ),
- Email::MIME->create(
- attributes => {
- content_type => 'text/plain',
- },
- body=> 'hi',
- )
- ];
- my $email = Email::MIME->create(
- header_str => [
- From => 'a@example.com',
- Subject => 'blah',
- 'Content-Type' => 'multipart/alternative'
- ],
- parts => $parts,
- );
- is(1, PublicInbox::Filter->run($email), "run was a success");
- my $parsed = Email::MIME->new($email->as_string);
- is("text/plain", $parsed->header("Content-Type"));
- is(scalar $parsed->parts, 1, "HTML part removed");
- my %bodies;
- $parsed->walk_parts(sub {
- my ($part) = @_;
- return if $part->subparts; # walk_parts already recurses
- count_body_parts(\%bodies, $part);
- });
- is(scalar keys %bodies, 1, "one bodies");
- is($bodies{"hi"}, 1, "plain text part unchanged");
-}
-
-# multi-part plain-text-only
-{
- my $parts = [
- Email::MIME->create(
- attributes => { content_type => 'text/plain', },
- body => 'hi',
- ),
- Email::MIME->create(
- attributes => { content_type => 'text/plain', },
- body => 'bye',
- )
- ];
- my $email = Email::MIME->create(
- header_str => [ From => 'a@example.com', Subject => 'blah' ],
- parts => $parts,
- );
- is(1, PublicInbox::Filter->run($email), "run was a success");
- my $parsed = Email::MIME->new($email->as_string);
- is(scalar $parsed->parts, 2, "still 2 parts");
- my %bodies;
- $parsed->walk_parts(sub {
- my ($part) = @_;
- return if $part->subparts; # walk_parts already recurses
- count_body_parts(\%bodies, $part);
- });
- is(scalar keys %bodies, 2, "two bodies");
- is($bodies{"bye"}, 1, "bye part exists");
- is($bodies{"hi"}, 1, "hi part exists");
- is($parsed->header("X-Content-Filtered-By"), undef,
- "XCFB header unset");
-}
-
-# multi-part HTML, several HTML parts
-{
- my $parts = [
- Email::MIME->create(
- attributes => {
- content_type => 'text/html',
- encoding => 'base64',
- },
- body => '<html><body>b64 body</body></html>',
- ),
- Email::MIME->create(
- attributes => {
- content_type => 'text/html',
- encoding => 'quoted-printable',
- },
- body => '<html><body>qp body</body></html>',
- )
- ];
- my $email = Email::MIME->create(
- header_str => [ From => 'a@example.com', Subject => 'blah' ],
- parts => $parts,
- );
- is(1, PublicInbox::Filter->run($email), "run was a success");
- my $parsed = Email::MIME->new($email->as_string);
- is(scalar $parsed->parts, 2, "still 2 parts");
- my %bodies;
- $parsed->walk_parts(sub {
- my ($part) = @_;
- return if $part->subparts; # walk_parts already recurses
- count_body_parts(\%bodies, $part);
- });
- is(scalar keys %bodies, 2, "two body parts");
- is($bodies{"b64 body"}, 1, "base64 part converted");
- is($bodies{"qp body"}, 1, "qp part converted");
- like($parsed->header("X-Content-Filtered-By"), qr/PublicInbox::Filter/,
- "XCFB header added");
-}
-
-# plain-text with image attachments, kill images
-{
- my $parts = [
- Email::MIME->create(
- attributes => { content_type => 'text/plain' },
- body => 'see image',
- ),
- Email::MIME->create(
- attributes => {
- content_type => 'image/jpeg',
- filename => 'scary.jpg',
- encoding => 'base64',
- },
- body => 'bad',
- )
- ];
- my $email = Email::MIME->create(
- header_str => [ From => 'a@example.com', Subject => 'blah' ],
- parts => $parts,
- );
- is(1, PublicInbox::Filter->run($email), "run was a success");
- my $parsed = Email::MIME->new($email->as_string);
- is(scalar $parsed->parts, 1, "image part removed");
- my %bodies;
- $parsed->walk_parts(sub {
- my ($part) = @_;
- return if $part->subparts; # walk_parts already recurses
- count_body_parts(\%bodies, $part);
- });
- is(scalar keys %bodies, 1, "one body");
- is($bodies{'see image'}, 1, 'original body exists');
- like($parsed->header("X-Content-Filtered-By"), qr/PublicInbox::Filter/,
- "XCFB header added");
-}
-
-# all bad
-{
- my $parts = [
- Email::MIME->create(
- attributes => {
- content_type => 'image/jpeg',
- filename => 'scary.jpg',
- encoding => 'base64',
- },
- body => 'bad',
- ),
- Email::MIME->create(
- attributes => {
- content_type => 'text/plain',
- filename => 'scary.exe',
- encoding => 'base64',
- },
- body => 'bad',
- ),
- ];
- my $email = Email::MIME->create(
- header_str => [ From => 'a@example.com', Subject => 'blah' ],
- parts => $parts,
- );
- is(0, PublicInbox::Filter->run($email),
- "run signaled to stop delivery");
- my $parsed = Email::MIME->new($email->as_string);
- is(scalar $parsed->parts, 1, "bad parts removed");
- my %bodies;
- $parsed->walk_parts(sub {
- my ($part) = @_;
- return if $part->subparts; # walk_parts already recurses
- count_body_parts(\%bodies, $part);
- });
- is(scalar keys %bodies, 1, "one body");
- is($bodies{"all attachments scrubbed by PublicInbox::Filter"}, 1,
- "attachment scrubber left its mark");
- like($parsed->header("X-Content-Filtered-By"), qr/PublicInbox::Filter/,
- "XCFB header added");
-}
-
-{
- my $s = Email::MIME->create(
- header => [
- From => 'a@example.com',
- To => 'b@example.com',
- 'Content-Type' => 'test/pain',
- Subject => 'this is a subject',
- ],
- body => "hello world\n",
- );
- is(0, PublicInbox::Filter->run($s), "run was a failure");
- like($s->as_string, qr/scrubbed/, "scrubbed message");
-}
-
-# multi-part with application/octet-stream
-{
- my $os = 'application/octet-stream';
- my $parts = [
- Email::MIME->create(
- attributes => { content_type => $os },
- body => <<EOF
-#include <stdio.h>
-int main(void)
-{
- printf("Hello world\\n");
- return 0;
-}
-\f
-/* some folks like ^L */
-EOF
- ),
- Email::MIME->create(
- attributes => {
- filename => 'zero.data',
- encoding => 'base64',
- content_type => $os,
- },
- body => ("\0" x 4096),
- )
- ];
- my $email = Email::MIME->create(
- header_str => [ From => 'a@example.com', Subject => 'blah' ],
- parts => $parts,
- );
- is(1, PublicInbox::Filter->run($email), "run was a success");
- my $parsed = Email::MIME->new($email->as_string);
- is(scalar $parsed->parts, 1, "only one remaining part");
- like($parsed->header("X-Content-Filtered-By"),
- qr/PublicInbox::Filter/, "XCFB header added");
-}
-
-done_testing();
^ permalink raw reply related [flat|nested] 10+ messages in thread