* [PATCH] reject HTML loudly and automatically
@ 2015-07-14 21:02 Eric Wong
0 siblings, 0 replies; only message in thread
From: Eric Wong @ 2015-07-14 21:02 UTC (permalink / raw)
To: meta
This should hopefully reduce the delay between when a user fails
to send plain-text to when an admin such as myself notices the
HTML mail in a sea of spam.
Unfortunately, this can lead to backscatter, so avoid doing it
until its passed through spamc, at least.
---
lib/PublicInbox/Filter.pm | 11 +++++++----
public-inbox-mda | 2 +-
t/mda.t | 35 ++++++++++++++++++++++++++++++++++-
3 files changed, 42 insertions(+), 6 deletions(-)
diff --git a/lib/PublicInbox/Filter.pm b/lib/PublicInbox/Filter.pm
index 6862bb6..49ba5cb 100644
--- a/lib/PublicInbox/Filter.pm
+++ b/lib/PublicInbox/Filter.pm
@@ -12,6 +12,7 @@ use Email::MIME::ContentType qw/parse_content_type/;
use Email::Filter;
use IPC::Run;
our $VERSION = '0.0.1';
+use constant NO_HTML => '*** We only accept plain-text email, no HTML ***';
# start with the same defaults as mailman
our $BAD_EXT = qr/\.(?:exe|bat|cmd|com|pif|scr|vbs|cpl)\z/i;
@@ -21,7 +22,7 @@ our $MIME_TEXT_ANY = qr!\btext/[a-z0-9\+\._-]+\b!i;
# this is highly opinionated delivery
# returns 0 only if there is nothing to deliver
sub run {
- my ($class, $mime) = @_;
+ my ($class, $mime, $filter) = @_;
my $content_type = $mime->header('Content-Type') || 'text/plain';
@@ -38,6 +39,7 @@ sub run {
if ($content_type =~ m!\btext/plain\b!i) {
return 1; # yay, nothing to do
} elsif ($content_type =~ $MIME_HTML) {
+ $filter->reject(NO_HTML) if $filter;
# HTML-only, non-multipart
my $body = $mime->body;
my $ct_parsed = parse_content_type($content_type);
@@ -45,7 +47,7 @@ sub run {
replace_body($mime, $body);
return 1;
} elsif ($content_type =~ m!\bmultipart/!i) {
- return strip_multipart($mime, $content_type);
+ return strip_multipart($mime, $content_type, $filter);
} else {
replace_body($mime, "$content_type message scrubbed");
return 0;
@@ -109,9 +111,9 @@ sub dump_html {
# this is to correct user errors and not expected to cover all corner cases
# if users don't want to hit this, they should be sending text/plain messages
# unfortunately, too many people send HTML mail and we'll attempt to convert
-# it to something safer, smaller and harder-to-track.
+# it to something safer, smaller and harder-to-spy-on-users-with.
sub strip_multipart {
- my ($mime, $content_type) = @_;
+ my ($mime, $content_type, $filter) = @_;
my (@html, @keep);
my $rejected = 0;
@@ -133,6 +135,7 @@ sub strip_multipart {
if ($part_type =~ m!\btext/plain\b!i) {
push @keep, $part;
} elsif ($part_type =~ $MIME_HTML) {
+ $filter->reject(NO_HTML) if $filter;
push @html, $part;
} elsif ($part_type =~ $MIME_TEXT_ANY) {
# Give other text attachments the benefit of the doubt,
diff --git a/public-inbox-mda b/public-inbox-mda
index 047a482..4348cb2 100755
--- a/public-inbox-mda
+++ b/public-inbox-mda
@@ -38,7 +38,7 @@ if (PublicInbox::MDA->precheck($filter, $dst->{address}) &&
$filtered = undef;
$filter->simple($msg);
- if (PublicInbox::Filter->run($msg)) {
+ if (PublicInbox::Filter->run($msg, $filter)) {
# run spamc again on the HTML-free message
if (do_spamc($msg, \$filtered)) {
$msg = Email::MIME->new(\$filtered);
diff --git a/t/mda.t b/t/mda.t
index 53712a5..67e86f4 100644
--- a/t/mda.t
+++ b/t/mda.t
@@ -23,6 +23,7 @@ my $fail_path = "$fail_bin:$ENV{PATH}"; # for spamc spam mock
my $addr = 'test-public@example.com';
my $cfgpfx = "publicinbox.test";
my $failbox = "$home/fail.mbox";
+my $mime;
{
ok(-x "$main_bin/spamc",
@@ -235,7 +236,7 @@ EOF
)
];
$mid = 'multipart-html-sucks@11';
- my $mime = Email::MIME->create(
+ $mime = Email::MIME->create(
header_str => [
From => 'a@example.com',
Subject => 'blah',
@@ -276,6 +277,38 @@ EOF
ok(-d $faildir, "emergency exists");
my @new = glob("$faildir/new/*");
is(scalar(@new), 1, "message delivered");
+ is(unlink(@new), 1, "removed emergency message");
+
+ local $ENV{PATH} = $main_path;
+ $in = <<EOF;
+From: Faildir <faildir\@example.com>
+To: $addr
+Content-Type: text/html
+Message-ID: <faildir\@example.com>
+Subject: faildir subject
+Date: Thu, 01 Jan 1970 00:00:00 +0000
+
+<html><body>bad</body></html>
+EOF
+ my $out = '';
+ my $err = '';
+ run([$mda], \$in, \$out, \$err);
+ isnt($?, 0, "mda exited with failure");
+ is(length $out, 0, 'nothing in stdout');
+ isnt(length $err, 0, 'error message in stderr');
+
+ @new = glob("$faildir/new/*");
+ is(scalar(@new), 0, "new message did not show up");
+
+ # reject multipart again
+ $in = $mime->as_string;
+ $err = '';
+ run([$mda], \$in, \$out, \$err);
+ isnt($?, 0, "mda exited with failure");
+ is(length $out, 0, 'nothing in stdout');
+ isnt(length $err, 0, 'error message in stderr');
+ @new = glob("$faildir/new/*");
+ is(scalar(@new), 0, "new message did not show up");
}
done_testing();
--
EW
^ permalink raw reply related [flat|nested] only message in thread
only message in thread, other threads:[~2015-07-14 21:02 UTC | newest]
Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-07-14 21:02 [PATCH] reject HTML loudly and automatically Eric Wong
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).