* [PATCH 2/6] split out spamcheck/spamc to its own module.
2016-06-24 20:47 [PATCH 1/6] implement ListMirror SpamAssassin plugin Eric Wong
@ 2016-06-24 20:47 ` Eric Wong
2016-06-24 20:47 ` [PATCH 3/6] document Filesys::Notify::Simple dependency Eric Wong
` (3 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Eric Wong @ 2016-06-24 20:47 UTC (permalink / raw)
To: meta
This should hopefully make it easier to try other anti-spam
systems (or none at all) in the future.
---
MANIFEST | 2 +
lib/PublicInbox/Spamcheck/Spamc.pm | 94 ++++++++++++++++++++++++++++++++++++++
script/public-inbox-learn | 21 +++------
script/public-inbox-mda | 23 ++--------
t/spamcheck_spamc.t | 49 ++++++++++++++++++++
5 files changed, 156 insertions(+), 33 deletions(-)
create mode 100644 lib/PublicInbox/Spamcheck/Spamc.pm
create mode 100644 t/spamcheck_spamc.t
diff --git a/MANIFEST b/MANIFEST
index bc7d54c..834cb5d 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -69,6 +69,7 @@ lib/PublicInbox/Search.pm
lib/PublicInbox/SearchIdx.pm
lib/PublicInbox/SearchMsg.pm
lib/PublicInbox/SearchView.pm
+lib/PublicInbox/Spamcheck/Spamc.pm
lib/PublicInbox/Spawn.pm
lib/PublicInbox/SpawnPP.pm
lib/PublicInbox/Thread.pm
@@ -133,6 +134,7 @@ t/psgi_attach.t
t/psgi_mount.t
t/qspawn.t
t/search.t
+t/spamcheck_spamc.t
t/spawn.t
t/utf8.mbox
t/view.t
diff --git a/lib/PublicInbox/Spamcheck/Spamc.pm b/lib/PublicInbox/Spamcheck/Spamc.pm
new file mode 100644
index 0000000..312e52d
--- /dev/null
+++ b/lib/PublicInbox/Spamcheck/Spamc.pm
@@ -0,0 +1,94 @@
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+package PublicInbox::Spamcheck::Spamc;
+use strict;
+use warnings;
+use PublicInbox::Spawn qw(popen_rd spawn);
+use IO::File;
+use Fcntl qw(:DEFAULT SEEK_SET);
+
+sub new {
+ my ($class) = @_;
+ bless {
+ checkcmd => [qw(spamc -E --headers)],
+ hamcmd => [qw(spamc -L ham)],
+ spamcmd => [qw(spamc -L spam)],
+ }, $class;
+}
+
+sub spamcheck {
+ my ($self, $msg, $out) = @_;
+
+ my $tmp;
+ my $fd = _msg_to_fd($self, $msg, \$tmp);
+ my $rdr = { 0 => $fd };
+ my ($fh, $pid) = popen_rd($self->{checkcmd}, undef, $rdr);
+ defined $pid or die "failed to popen_rd spamc: $!\n";
+ my $r;
+ unless (ref $out) {
+ my $buf = '';
+ $out = \$buf;
+ }
+ do {
+ $r = sysread($fh, $$out, 65536, length($$out));
+ } while (defined($r) && $r != 0);
+ defined $r or die "read failed: $!";
+ close $fh or die "close failed: $!";
+ waitpid($pid, 0);
+ ($? || $$out eq '') ? 0 : 1;
+}
+
+sub hamlearn {
+ my ($self, $msg, $rdr) = @_;
+ _learn($self, $msg, $rdr, 'hamcmd');
+}
+
+sub spamlearn {
+ my ($self, $msg, $rdr) = @_;
+ _learn($self, $msg, $rdr, 'spamcmd');
+}
+
+sub _learn {
+ my ($self, $msg, $rdr, $field) = @_;
+ $rdr ||= {};
+ $rdr->{1} ||= $self->_devnull;
+ $rdr->{2} ||= $self->_devnull;
+ my $tmp;
+ $rdr->{0} = _msg_to_fd($self, $msg, \$tmp);
+ my $pid = spawn($self->{$field}, undef, $rdr);
+ waitpid($pid, 0);
+ !$?;
+}
+
+sub _devnull {
+ my ($self) = @_;
+ my $fd = $self->{-devnullfd};
+ return $fd if defined $fd;
+ open my $fh, '+>', '/dev/null' or
+ die "failed to open /dev/null: $!";
+ $self->{-devnull} = $fh;
+ $self->{-devnullfd} = fileno($fh);
+}
+
+sub _msg_to_fd {
+ my ($self, $msg, $tmpref) = @_;
+ my $tmpfh;
+ my $fd;
+ if (my $ref = ref($msg)) {
+
+ return $msg->fileno if $ref ne 'SCALAR' && $msg->can('fileno');
+
+ $tmpfh = IO::File->new_tmpfile;
+ $tmpfh->autoflush(1);
+ $msg = \($msg->as_string) if $ref ne 'SCALAR';
+ print $tmpfh $$msg or die "failed to print: $!";
+ sysseek($tmpfh, 0, SEEK_SET) or
+ die "sysseek(fh) failed: $!";
+ $$tmpref = $tmpfh;
+
+ return fileno($tmpfh);
+ }
+ $msg;
+}
+
+1;
diff --git a/script/public-inbox-learn b/script/public-inbox-learn
index b05ef05..7ef2a31 100755
--- a/script/public-inbox-learn
+++ b/script/public-inbox-learn
@@ -14,12 +14,13 @@ use Email::MIME;
use Email::MIME::ContentType;
$Email::MIME::ContentType::STRICT_PARAMS = 0; # user input is imperfect
use PublicInbox::Address;
-use PublicInbox::Spawn qw(spawn);
+use PublicInbox::Spamcheck::Spamc;
my $train = shift or die "usage: $usage\n";
if ($train !~ /\A(?:ham|spam)\z/) {
die "`$train' not recognized.\nusage: $usage\n";
}
+my $spamc = PublicInbox::Spamcheck::Spamc->new;
my $pi_config = PublicInbox::Config->new;
my $err;
my $mime = Email::MIME->new(eval {
@@ -27,19 +28,11 @@ my $mime = Email::MIME->new(eval {
my $data = scalar <STDIN>;
$data =~ s/\AFrom [^\r\n]*\r?\n//s;
eval {
- my @cmd = (qw(spamc -L), $train);
- my ($r, $w);
- pipe($r, $w) or die "pipe failed: $!";
- open my $null, '>', '/dev/null' or
- die "failed to open /dev/null: $!";
- my $nullfd = fileno($null);
- my %rdr = (0 => fileno($r), 1 => $nullfd, 2 => $nullfd);
- my $pid = spawn(\@cmd, undef, \%rdr);
- close $null;
- close $r or die "close \$r failed: $!";
- print $w $data or die "print \$w failed: $!";
- close $w or die "close \$w failed: $!";
- waitpid($pid, 0);
+ if ($train eq 'ham') {
+ $spamc->hamlearn(\$data);
+ } else {
+ $spamc->spamlearn(\$data);
+ }
die "spamc failed with: $?\n" if $?;
};
$err = $@;
diff --git a/script/public-inbox-mda b/script/public-inbox-mda
index 013642d..f739ad0 100755
--- a/script/public-inbox-mda
+++ b/script/public-inbox-mda
@@ -24,7 +24,7 @@ use PublicInbox::Import;
use PublicInbox::Git;
use PublicInbox::Emergency;
use PublicInbox::Filter::Base;
-use PublicInbox::Spawn qw(popen_rd);
+use PublicInbox::Spamcheck::Spamc;
# n.b: hopefully we can setup the emergency path without bailing due to
# user error, we really want to setup the emergency destination ASAP
@@ -44,9 +44,9 @@ my $main_repo = $dst->{mainrepo} or do_exit(1);
# pre-check, MDA has stricter rules than an importer might;
do_exit(0) unless PublicInbox::MDA->precheck($simple, $dst->{address});
-
+my $spamc = PublicInbox::Spamcheck::Spamc->new;
$str = '';
-my $spam_ok = do_spamc($ems->fh, \$str);
+my $spam_ok = $spamc->spamcheck($ems->fh, \$str);
$simple = undef;
$emm = PublicInbox::Emergency->new($emergency);
$emm->prepare(\$str);
@@ -90,20 +90,5 @@ if (defined $im->add($mime)) {
$mime->header_obj->header_raw('Message-ID'),
" exists\n";
}
-do_exit(0);
-
-# we depend on "report_safe 0" in /etc/spamassassin/*.cf with --headers
-sub do_spamc {
- my ($in, $out) = @_;
- my $rdr = { 0 => fileno($in) };
- my ($fh, $pid) = popen_rd([qw/spamc -E --headers/], undef, $rdr);
- defined $pid or die "failed to popen_rd spamc: $!\n";
- my $r;
- do {
- $r = sysread($fh, $$out, 65536, length($$out));
- } while (defined($r) && $r != 0);
- close $fh or die "close failed: $!\n";
- waitpid($pid, 0);
- ($? || $$out eq '') ? 0 : 1;
-}
+do_exit(0);
diff --git a/t/spamcheck_spamc.t b/t/spamcheck_spamc.t
new file mode 100644
index 0000000..65ac5c2
--- /dev/null
+++ b/t/spamcheck_spamc.t
@@ -0,0 +1,49 @@
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Test::More;
+use Cwd;
+use Email::Simple;
+use IO::File;
+use File::Temp qw/tempdir/;
+use Fcntl qw(:DEFAULT SEEK_SET);
+my $tmpdir = tempdir('spamcheck_spamc-XXXXXX', TMPDIR => 1, CLEANUP => 1);
+
+use_ok 'PublicInbox::Spamcheck::Spamc';
+my $spamc = PublicInbox::Spamcheck::Spamc->new;
+$spamc->{checkcmd} = [qw(cat)];
+
+{
+ open my $fh, '+>', "$tmpdir/file" or die "open failed: $!";
+ ok(!$spamc->spamcheck($fh), 'empty '.ref($fh));
+}
+ok(!$spamc->spamcheck(IO::File->new_tmpfile), 'IO::File->new_tmpfile');
+
+my $dst = '';
+my $src = <<'EOF';
+Date: Thu, 01 Jan 1970 00:00:00 +0000
+To: <e@example.com>
+From: <e@example.com>
+Subject: test
+Message-ID: <testmessage@example.com>
+
+EOF
+ok($spamc->spamcheck(Email::Simple->new($src), \$dst), 'Email::Simple works');
+is($dst, $src, 'input == output');
+
+$dst = '';
+$spamc->{checkcmd} = ['sh', '-c', 'cat; false'];
+ok(!$spamc->spamcheck(Email::Simple->new($src), \$dst), 'Failed check works');
+is($dst, $src, 'input == output for spammy example');
+
+for my $l (qw(ham spam)) {
+ my $file = "$tmpdir/$l.out";
+ $spamc->{$l.'cmd'} = ['tee', $file ];
+ my $method = $l.'learn';
+ ok($spamc->$method(Email::Simple->new($src)), "$method OK");
+ open my $fh, '<', $file or die "failed to open $file: $!";
+ is(eval { local $/, <$fh> }, $src, "$l command ran alright");
+}
+
+done_testing();
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 3/6] document Filesys::Notify::Simple dependency
2016-06-24 20:47 [PATCH 1/6] implement ListMirror SpamAssassin plugin Eric Wong
2016-06-24 20:47 ` [PATCH 2/6] split out spamcheck/spamc to its own module Eric Wong
@ 2016-06-24 20:47 ` Eric Wong
2016-06-24 20:47 ` [PATCH 4/6] watch_maildir: rename _check_spam => _remove_spam Eric Wong
` (2 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Eric Wong @ 2016-06-24 20:47 UTC (permalink / raw)
To: meta
And improve documentation for existing dependencies, too.
---
INSTALL | 24 ++++++++++++++----------
lib/PublicInbox/WatchMaildir.pm | 2 ++
t/watch_maildir.t | 5 +++++
3 files changed, 21 insertions(+), 10 deletions(-)
diff --git a/INSTALL b/INSTALL
index e7d4b75..7f12fbe 100644
--- a/INSTALL
+++ b/INSTALL
@@ -19,8 +19,8 @@ standard MakeMaker installation (Perl)
make test
make install # root permissions may be needed
-Requirements (server MDA)
--------------------------
+Requirements
+------------
* git
* SpamAssassin (spamc/spamd)
@@ -36,19 +36,23 @@ Optional modules:
- Plack[1] libplack-perl
- Mail::Thread (2.5+)[1] libmail-thread-perl
- URI::Escape[1] liburi-perl
- - Search::Xapian[3] libsearch-xapian-perl
- - IO::Compress::Gzip[3] libio-compress-perl
+ - Search::Xapian[2][3] libsearch-xapian-perl
+ - IO::Compress::Gzip[3] perl-modules (or libio-compress-perl)
- DBI[3] libdbi-perl
- - DBD::SQLite[3] libdbd-sqlite3-perl
+ - DBD::SQLite[2][3] libdbd-sqlite3-perl
- Danga::Socket[4] libdanga-socket-perl
- - Net::Server[4] libnet-server-perl
+ - Net::Server[5] libnet-server-perl
+ - Filesys::Notify::Simple[6] libfilesys-notify-simple-perl
-[1] - Only required for serving/generating Atom and HTML pages.
-[3] - Optional for HTML web interface and HTTP/NNTP servers
-[4] - Optional for HTTP and NNTP servers
+[1] - Optional, needed for serving/generating Atom and HTML pages
+[2] - Optional, only required for NNTP server
+[3] - Optional, needed for gzipped mbox support over HTTP
+[4] - Optional, needed for bundled HTTP and NNTP servers
+[5] - Optional, needed for standalone daemonization of HTTP+NNTP servers
+[6] - Optional, needed for public-inbox-watch Maildir watcher
When installing Search::Xapian, make sure the underlying Xapian
-is patched against the index corruption bug documented in:
+installation is not affected by an index corruption bug:
https://bugs.debian.org/808610
diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm
index 4468a44..abf1df7 100644
--- a/lib/PublicInbox/WatchMaildir.pm
+++ b/lib/PublicInbox/WatchMaildir.pm
@@ -144,6 +144,8 @@ sub watch {
my $cb = sub { _try_fsn_paths($self, \@_) };
my $mdir = $self->{mdir};
+ # lazy load here, we may support watching via IMAP IDLE
+ # in the future...
require Filesys::Notify::Simple;
my $watcher = Filesys::Notify::Simple->new($mdir);
$watcher->wait($cb) while (1);
diff --git a/t/watch_maildir.t b/t/watch_maildir.t
index e8c9740..be1a312 100644
--- a/t/watch_maildir.t
+++ b/t/watch_maildir.t
@@ -4,6 +4,11 @@ use Test::More;
use File::Temp qw/tempdir/;
use Email::MIME;
use PublicInbox::Config;
+my @mods = qw(Filesys::Notify::Simple);
+foreach my $mod (@mods) {
+ eval "require $mod";
+ plan skip_all => "$mod missing for watch_maildir.t" if $@;
+}
my $tmpdir = tempdir('watch_maildir-XXXXXX', TMPDIR => 1, CLEANUP => 1);
my $git_dir = "$tmpdir/test.git";
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 4/6] watch_maildir: rename _check_spam => _remove_spam
2016-06-24 20:47 [PATCH 1/6] implement ListMirror SpamAssassin plugin Eric Wong
2016-06-24 20:47 ` [PATCH 2/6] split out spamcheck/spamc to its own module Eric Wong
2016-06-24 20:47 ` [PATCH 3/6] document Filesys::Notify::Simple dependency Eric Wong
@ 2016-06-24 20:47 ` Eric Wong
2016-06-24 20:47 ` [PATCH 5/6] watch_maildir: implement optional spam checking Eric Wong
2016-06-24 20:47 ` [PATCH 6/6] watch_maildir: ignore Trash and Drafts, support Dovecot Eric Wong
4 siblings, 0 replies; 6+ messages in thread
From: Eric Wong @ 2016-06-24 20:47 UTC (permalink / raw)
To: meta
We do not actually do spam checking, here; but will
do spam checking before adding a message in the future.
---
lib/PublicInbox/WatchMaildir.pm | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm
index abf1df7..c1fe81e 100644
--- a/lib/PublicInbox/WatchMaildir.pm
+++ b/lib/PublicInbox/WatchMaildir.pm
@@ -69,7 +69,7 @@ sub _try_fsn_paths {
_done_for_now($self);
}
-sub _check_spam {
+sub _remove_spam {
my ($self, $path) = @_;
$path =~ /:2,[A-R]*S[T-Z]*\z/ or return;
my $mime = _path_to_mime($path) or return;
@@ -121,7 +121,7 @@ sub _try_path {
return;
}
if (!ref($inbox) && $inbox eq 'watchspam') {
- return _check_spam($self, $path);
+ return _remove_spam($self, $path);
}
my $im = _importer_for($self, $inbox);
my $mime = _path_to_mime($path) or return;
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 5/6] watch_maildir: implement optional spam checking
2016-06-24 20:47 [PATCH 1/6] implement ListMirror SpamAssassin plugin Eric Wong
` (2 preceding siblings ...)
2016-06-24 20:47 ` [PATCH 4/6] watch_maildir: rename _check_spam => _remove_spam Eric Wong
@ 2016-06-24 20:47 ` Eric Wong
2016-06-24 20:47 ` [PATCH 6/6] watch_maildir: ignore Trash and Drafts, support Dovecot Eric Wong
4 siblings, 0 replies; 6+ messages in thread
From: Eric Wong @ 2016-06-24 20:47 UTC (permalink / raw)
To: meta
Mailing lists I watch and mirror may not have the best spam
filtering, and an extra layer should not hurt.
---
lib/PublicInbox/Import.pm | 6 +++++-
lib/PublicInbox/WatchMaildir.pm | 34 ++++++++++++++++++++++++++++++++--
t/import.t | 6 +++++-
t/watch_maildir.t | 34 ++++++++++++++++++++++++++++++++++
4 files changed, 76 insertions(+), 4 deletions(-)
diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm
index 5ffc26e..27f36a7 100644
--- a/lib/PublicInbox/Import.pm
+++ b/lib/PublicInbox/Import.pm
@@ -140,7 +140,7 @@ sub remove {
# returns undef on duplicate
sub add {
- my ($self, $mime) = @_; # mime = Email::MIME
+ my ($self, $mime, $check_cb) = @_; # mime = Email::MIME
my $from = $mime->header('From');
my ($email) = ($from =~ /([^<\s]+\@[^>\s]+)/g);
@@ -170,6 +170,10 @@ sub add {
# kill potentially confusing/misleading headers
$mime->header_set($_) for qw(bytes lines content-length status);
+ if ($check_cb) {
+ $mime = $check_cb->($mime) or return;
+ }
+
$mime = $mime->as_string;
my $blob = $self->{mark}++;
print $w "blob\nmark :$blob\ndata ", length($mime), "\n" or wfail;
diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm
index c1fe81e..72bd3d0 100644
--- a/lib/PublicInbox/WatchMaildir.pm
+++ b/lib/PublicInbox/WatchMaildir.pm
@@ -13,7 +13,9 @@ use PublicInbox::Spawn qw(spawn);
sub new {
my ($class, $config) = @_;
- my (%mdmap, @mdir);
+ my (%mdmap, @mdir, $spamc);
+
+ # XXX is "publicinboxlearn" really a good namespace for this?
my $k = 'publicinboxlearn.watchspam';
if (my $spamdir = $config->{$k}) {
if ($spamdir =~ s/\Amaildir://) {
@@ -26,6 +28,21 @@ sub new {
warn "unsupported $k=$spamdir\n";
}
}
+
+ $k = 'publicinboxwatch.spamcheck';
+ my $spamcheck = $config->{$k};
+ if ($spamcheck) {
+ if ($spamcheck eq 'spamc') {
+ $spamcheck = 'PublicInbox::Spamcheck::Spamc';
+ }
+ if ($spamcheck =~ /::/) {
+ eval "require $spamcheck";
+ $spamcheck = _spamcheck_cb($spamcheck->new);
+ } else {
+ warn "unsupported $k=$spamcheck\n";
+ $spamcheck = undef;
+ }
+ }
foreach $k (keys %$config) {
$k =~ /\Apublicinbox\.([^\.]+)\.watch\z/ or next;
my $name = $1;
@@ -52,6 +69,7 @@ sub new {
my $mdre = join('|', map { quotemeta($_) } @mdir);
$mdre = qr!\A($mdre)/!;
bless {
+ spamcheck => $spamcheck,
mdmap => \%mdmap,
mdir => \@mdir,
mdre => $mdre,
@@ -136,7 +154,7 @@ sub _try_path {
}
_force_mid($mime);
- $im->add($mime);
+ $im->add($mime, $self->{spamcheck});
}
sub watch {
@@ -208,4 +226,16 @@ sub _scrubber_for {
undef;
}
+sub _spamcheck_cb {
+ my ($sc) = @_;
+ sub {
+ my ($mime) = @_;
+ my $tmp = '';
+ if ($sc->spamcheck($mime, \$tmp)) {
+ return Email::MIME->new(\$tmp);
+ }
+ undef;
+ }
+}
+
1;
diff --git a/t/import.t b/t/import.t
index 09c0036..73f92ad 100644
--- a/t/import.t
+++ b/t/import.t
@@ -30,7 +30,7 @@ is(scalar @revs, 1, 'one revision created');
$mime->header_set('Message-ID', '<b@example.com>');
$mime->header_set('Subject', 'msg2');
-like($im->add($mime), qr/\A:\d+\z/, 'added 2nd message');
+like($im->add($mime, sub { $mime }), qr/\A:\d+\z/, 'added 2nd message');
$im->done;
@revs = $git->qx(qw(rev-list HEAD));
is(scalar @revs, 2, '2 revisions exist');
@@ -61,5 +61,9 @@ is($mark, 'MISMATCH', 'mark == MISMATCH on mismatch');
is($msg->header('Message-ID'), '<a@example.com>', 'Message-ID matches');
isnt($msg->header('Subject'), $mime->header('Subject'), 'subject mismatch');
+$mime->header_set('Message-Id', '<failcheck@example.com>');
+is($im->add($mime, sub { undef }), undef, 'check callback fails');
+is($im->remove($mime), undef, 'message not added, so not removed');
+
$im->done;
done_testing();
diff --git a/t/watch_maildir.t b/t/watch_maildir.t
index be1a312..2138963 100644
--- a/t/watch_maildir.t
+++ b/t/watch_maildir.t
@@ -3,6 +3,7 @@
use Test::More;
use File::Temp qw/tempdir/;
use Email::MIME;
+use Cwd;
use PublicInbox::Config;
my @mods = qw(Filesys::Notify::Simple);
foreach my $mod (@mods) {
@@ -86,4 +87,37 @@ More majordomo info at http://vger.kernel.org/majordomo-info.html\n);
is(scalar @list, 4, 'four revisions in rev-list');
}
+{
+ my $fail_bin = getcwd()."/t/fail-bin";
+ ok(-x "$fail_bin/spamc", "mock spamc exists");
+ my $fail_path = "$fail_bin:$ENV{PATH}"; # for spamc ham mock
+ local $ENV{PATH} = $fail_path;
+ PublicInbox::Emergency->new($maildir)->prepare(\$msg);
+ $config->{'publicinboxwatch.spamcheck'} = 'spamc';
+ PublicInbox::WatchMaildir->new($config)->scan;
+ @list = $git->qx(qw(ls-tree -r --name-only refs/heads/master));
+ is(scalar @list, 0, 'tree has no files spamc checked');
+ is(unlink(glob("$maildir/new/*")), 1);
+}
+
+{
+ my $main_bin = getcwd()."/t/main-bin";
+ ok(-x "$main_bin/spamc", "mock spamc exists");
+ my $main_path = "$main_bin:$ENV{PATH}"; # for spamc ham mock
+ local $ENV{PATH} = $main_path;
+ PublicInbox::Emergency->new($maildir)->prepare(\$msg);
+ $config->{'publicinboxwatch.spamcheck'} = 'spamc';
+ @list = $git->qx(qw(ls-tree -r --name-only refs/heads/master));
+ PublicInbox::WatchMaildir->new($config)->scan;
+ @list = $git->qx(qw(ls-tree -r --name-only refs/heads/master));
+ is(scalar @list, 1, 'tree has one file after spamc checked');
+
+ # XXX: workaround some weird caching/memoization in cat-file,
+ # shouldn't be an issue in real-world use, though...
+ $git = PublicInbox::Git->new($git_dir);
+
+ my $mref = $git->cat_file('refs/heads/master:'.$list[0]);
+ like($$mref, qr/something\n\z/s, 'message scrubbed on import');
+}
+
done_testing;
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 6/6] watch_maildir: ignore Trash and Drafts, support Dovecot
2016-06-24 20:47 [PATCH 1/6] implement ListMirror SpamAssassin plugin Eric Wong
` (3 preceding siblings ...)
2016-06-24 20:47 ` [PATCH 5/6] watch_maildir: implement optional spam checking Eric Wong
@ 2016-06-24 20:47 ` Eric Wong
4 siblings, 0 replies; 6+ messages in thread
From: Eric Wong @ 2016-06-24 20:47 UTC (permalink / raw)
To: meta
Trashed messages and drafts are probably not intended for
importing, so do not import them. Dovecot uses extra flags via
lowercase letters, so we must support those (as that's the
server I use).
---
lib/PublicInbox/WatchMaildir.pm | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm
index 72bd3d0..b25704e 100644
--- a/lib/PublicInbox/WatchMaildir.pm
+++ b/lib/PublicInbox/WatchMaildir.pm
@@ -1,5 +1,8 @@
# Copyright (C) 2016 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+#
+# ref: https://cr.yp.to/proto/maildir.html
+# http://wiki2.dovecot.org/MailboxFormat/Maildir
package PublicInbox::WatchMaildir;
use strict;
use warnings;
@@ -89,7 +92,7 @@ sub _try_fsn_paths {
sub _remove_spam {
my ($self, $path) = @_;
- $path =~ /:2,[A-R]*S[T-Z]*\z/ or return;
+ $path =~ /:2,[A-R]*S[T-Z]*\z/i or return;
my $mime = _path_to_mime($path) or return;
_force_mid($mime);
foreach my $inbox (values %{$self->{mdmap}}) {
@@ -127,7 +130,11 @@ sub _force_mid {
sub _try_path {
my ($self, $path) = @_;
my @p = split(m!/+!, $path);
- return unless $p[-1] =~ /\A[a-zA-Z0-9][\w:,=\.]+\z/;
+ return if $p[-1] !~ /\A[a-zA-Z0-9][\w:,=\.]+\z/;
+ if ($p[-1] =~ /:2,([A-Z]+)\z/i) {
+ my $flags = $1;
+ return if $flags =~ /[DT]/; # no [D]rafts or [T]rashed mail
+ }
return unless -f $path;
if ($path !~ $self->{mdre}) {
warn "unrecognized path: $path\n";
^ permalink raw reply related [flat|nested] 6+ messages in thread