* [PATCH] filter/rubylang: fix SQLite DB lifetime problems
@ 2019-01-05 11:09 Eric Wong
2019-01-07 5:22 ` [2/1 PATCH] t/mda_filter_rubylang.t: set PI_EMERGENCY for -mda Eric Wong
0 siblings, 1 reply; 2+ messages in thread
From: Eric Wong @ 2019-01-05 11:09 UTC (permalink / raw)
To: meta
Clearly the AltId stuff was never tested for v2. Ensure
this tricky filter (which reuses Msgmap to avoid introducing
new serial numbers) doesn't trigger deadlocks SQLite due
to opening a DB for writing multiple times.
I went through several iterations of this change before
going with this one, which is the least intrusive I could
fine.
---
MANIFEST | 2 +
lib/PublicInbox/InboxWritable.pm | 14 +++-
lib/PublicInbox/V2Writable.pm | 13 ++++
lib/PublicInbox/WatchMaildir.pm | 5 +-
script/public-inbox-mda | 1 +
t/mda_filter_rubylang.t | 69 +++++++++++++++++++
t/watch_filter_rubylang.t | 109 +++++++++++++++++++++++++++++++
7 files changed, 208 insertions(+), 5 deletions(-)
create mode 100644 t/mda_filter_rubylang.t
create mode 100644 t/watch_filter_rubylang.t
diff --git a/MANIFEST b/MANIFEST
index d56cd85..e4f3df8 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -175,6 +175,7 @@ t/init.t
t/linkify.t
t/main-bin/spamc
t/mda.t
+t/mda_filter_rubylang.t
t/mid.t
t/mime.t
t/msg_iter.t
@@ -212,5 +213,6 @@ t/v2mirror.t
t/v2reindex.t
t/v2writable.t
t/view.t
+t/watch_filter_rubylang.t
t/watch_maildir.t
t/watch_maildir_v2.t
diff --git a/lib/PublicInbox/InboxWritable.pm b/lib/PublicInbox/InboxWritable.pm
index 87c9ada..2f1ca6f 100644
--- a/lib/PublicInbox/InboxWritable.pm
+++ b/lib/PublicInbox/InboxWritable.pm
@@ -46,9 +46,16 @@ sub importer {
}
sub filter {
- my ($self) = @_;
+ my ($self, $im) = @_;
my $f = $self->{filter};
if ($f && $f =~ /::/) {
+ # v2 keeps msgmap open, which causes conflicts for filters
+ # such as PublicInbox::Filter::RubyLang which overload msgmap
+ # for a predictable serial number.
+ if ($im && ($self->{version} || 1) >= 2 && $self->{altid}) {
+ $im->done;
+ }
+
my @args = (-inbox => $self);
# basic line splitting, only
# Perhaps we can have proper quote splitting one day...
@@ -100,7 +107,7 @@ ($)
sub import_maildir {
my ($self, $dir) = @_;
my $im = $self->importer(1);
- my $filter = $self->filter;
+
foreach my $sub (qw(cur new tmp)) {
-d "$dir/$sub" or die "$dir is not a Maildir (missing $sub)\n";
}
@@ -109,7 +116,8 @@ sub import_maildir {
while (defined(my $fn = readdir($dh))) {
next unless is_maildir_basename($fn);
my $mime = maildir_file_load("$dir/$fn") or next;
- if ($filter) {
+
+ if (my $filter = $self->filter($im)) {
my $ret = $filter->scrub($mime) or return;
return if $ret == REJECT();
$mime = $ret;
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index 0731964..93babed 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -163,6 +163,19 @@ sub num_for {
return if $existing;
}
+ # AltId may pre-populate article numbers (e.g. X-Mail-Count
+ # or NNTP article number), use that article number if it's
+ # not in Over.
+ my $altid = $self->{-inbox}->{altid};
+ if ($altid && grep(/:file=msgmap\.sqlite3\z/, @$altid)) {
+ my $num = $self->{mm}->num_for($mid);
+
+ if (defined $num && !$self->{over}->get_art($num)) {
+ $$mid0 = $mid;
+ return $num;
+ }
+ }
+
# very unlikely:
warn "<$mid> reused for mismatched content\n";
diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm
index 84080ba..2d4c6f4 100644
--- a/lib/PublicInbox/WatchMaildir.pm
+++ b/lib/PublicInbox/WatchMaildir.pm
@@ -121,7 +121,7 @@ sub _remove_spam {
eval {
my $im = _importer_for($self, $ibx);
$im->remove($mime, 'spam');
- if (my $scrub = $ibx->filter) {
+ if (my $scrub = $ibx->filter($im)) {
my $scrubbed = $scrub->scrub($mime, 1);
$scrubbed or return;
$scrubbed == REJECT() and return;
@@ -159,7 +159,8 @@ sub _try_path {
my $v = $mime->header_obj->header_raw($wm->[0]);
next unless ($v && $v =~ $wm->[1]);
}
- if (my $scrub = $ibx->filter) {
+
+ if (my $scrub = $ibx->filter($im)) {
my $ret = $scrub->scrub($mime) or next;
$ret == REJECT() and next;
$mime = $ret;
diff --git a/script/public-inbox-mda b/script/public-inbox-mda
index 183b915..7486059 100755
--- a/script/public-inbox-mda
+++ b/script/public-inbox-mda
@@ -81,6 +81,7 @@ if (ref($ret) && $ret->isa('Email::MIME')) { # filter altered message
$! = 65; # EX_DATAERR 5.6.0 data format error
die $filter->err, "\n";
} # else { accept
+$filter = undef;
PublicInbox::MDA->set_list_headers($mime, $dst);
my $im = $dst->importer(0);
diff --git a/t/mda_filter_rubylang.t b/t/mda_filter_rubylang.t
new file mode 100644
index 0000000..cb6da4b
--- /dev/null
+++ b/t/mda_filter_rubylang.t
@@ -0,0 +1,69 @@
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Test::More;
+use File::Temp qw/tempdir/;
+use PublicInbox::MIME;
+use PublicInbox::Config;
+my @mods = qw(DBD::SQLite Search::Xapian IPC::Run);
+foreach my $mod (@mods) {
+ eval "require $mod";
+ plan skip_all => "$mod missing for mda_filter_rubylang.t" if $@;
+}
+
+use_ok 'PublicInbox::V2Writable';
+my $tmpdir = tempdir('mda-XXXXXX', TMPDIR => 1, CLEANUP => 1);
+my $pi_config = "$tmpdir/pi_config";
+local $ENV{PI_CONFIG} = $pi_config;
+my $mda = 'blib/script/public-inbox-mda';
+my @cfg = ('git', 'config', "--file=$pi_config");
+is(system(@cfg, 'publicinboxmda.spamcheck', 'none'), 0);
+
+for my $v (qw(V1 V2)) {
+ my @warn;
+ $SIG{__WARN__} = sub { push @warn, @_ };
+ my $cfgpfx = "publicinbox.$v";
+ my $mainrepo = "$tmpdir/$v";
+ my $addr = "test-$v\@example.com";
+ my @cmd = ('blib/script/public-inbox-init', "-$v", $v, $mainrepo,
+ "http://example.com/$v", $addr);
+ is(system(@cmd), 0, 'public-inbox init OK');
+ if ($v eq 'V1') {
+ is(system('blib/script/public-inbox-index', $mainrepo), 0);
+ }
+ is(system(@cfg, "$cfgpfx.filter", 'PublicInbox::Filter::RubyLang'), 0);
+ is(system(@cfg, "$cfgpfx.altid",
+ 'serial:alerts:file=msgmap.sqlite3'), 0);
+
+ for my $i (1..2) {
+ local $ENV{ORIGINAL_RECIPIENT} = $addr;
+ my $msg = <<EOF;
+From: user\@example.com
+To: $addr
+Subject: blah $i
+X-Mail-Count: $i
+Message-Id: <a.$i\@b.com>
+Date: Sat, 05 Jan 2019 04:19:17 +0000
+
+something
+EOF
+ ok(IPC::Run::run([$mda], \"$msg"), 'message delivered');
+ }
+ my $config = PublicInbox::Config->new;
+ my $ibx = $config->lookup_name($v);
+
+ # make sure all serials are searchable:
+ my ($tot, $msgs);
+ for my $i (1..2) {
+ ($tot, $msgs) = $ibx->search->query("alerts:$i");
+ is($tot, 1, "got one result for alerts:$i");
+ is($msgs->[0]->{mid}, "a.$i\@b.com", "got expected MID for $i");
+ }
+ is_deeply([], \@warn, 'no warnings');
+
+ # TODO: public-inbox-learn doesn't know about filters
+ # (but -watch does)
+}
+
+done_testing();
diff --git a/t/watch_filter_rubylang.t b/t/watch_filter_rubylang.t
new file mode 100644
index 0000000..3256555
--- /dev/null
+++ b/t/watch_filter_rubylang.t
@@ -0,0 +1,109 @@
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Test::More;
+use File::Temp qw/tempdir/;
+use PublicInbox::MIME;
+use PublicInbox::Config;
+my @mods = qw(Filesys::Notify::Simple DBD::SQLite Search::Xapian);
+foreach my $mod (@mods) {
+ eval "require $mod";
+ plan skip_all => "$mod missing for watch_filter_rubylang_v2.t" if $@;
+}
+
+use_ok 'PublicInbox::V2Writable';
+use_ok 'PublicInbox::WatchMaildir';
+use_ok 'PublicInbox::Emergency';
+my $tmpdir = tempdir('watch-XXXXXX', TMPDIR => 1, CLEANUP => 1);
+local $ENV{PI_CONFIG} = "$tmpdir/pi_config";
+
+for my $v (qw(V1 V2)) {
+ my @warn;
+ $SIG{__WARN__} = sub { push @warn, @_ };
+ my $cfgpfx = "publicinbox.$v";
+ my $mainrepo = "$tmpdir/$v";
+ my $maildir = "$tmpdir/md-$v";
+ my $spamdir = "$tmpdir/spam-$v";
+ my $addr = "test-$v\@example.com";
+ my @cmd = ('blib/script/public-inbox-init', "-$v", $v, $mainrepo,
+ "http://example.com/$v", $addr);
+ is(system(@cmd), 0, 'public-inbox init OK');
+ if ($v eq 'V1') {
+ is(system('blib/script/public-inbox-index', $mainrepo), 0);
+ }
+ PublicInbox::Emergency->new($spamdir);
+
+ for my $i (1..15) {
+ my $msg = <<EOF;
+From: user\@example.com
+To: $addr
+Subject: blah $i
+X-Mail-Count: $i
+Message-Id: <a.$i\@b.com>
+Date: Sat, 05 Jan 2019 04:19:17 +0000
+
+something
+EOF
+ PublicInbox::Emergency->new($maildir)->prepare(\$msg);
+ }
+
+ my $spam = <<EOF;
+From: spammer\@example.com
+To: $addr
+Subject: spam
+X-Mail-Count: 99
+Message-Id: <a.99\@b.com>
+Date: Sat, 05 Jan 2019 04:19:17 +0000
+
+spam
+EOF
+ PublicInbox::Emergency->new($maildir)->prepare(\"$spam");
+
+ my %orig = (
+ "$cfgpfx.address" => $addr,
+ "$cfgpfx.mainrepo" => $mainrepo,
+ "$cfgpfx.watch" => "maildir:$maildir",
+ "$cfgpfx.filter" => 'PublicInbox::Filter::RubyLang',
+ "$cfgpfx.altid" => 'serial:alerts:file=msgmap.sqlite3',
+ "publicinboxwatch.watchspam" => "maildir:$spamdir",
+ );
+ my $config = PublicInbox::Config->new({%orig});
+ my $ibx = $config->lookup_name($v);
+ ok($ibx, 'found inbox by name');
+
+ my $w = PublicInbox::WatchMaildir->new($config);
+ for my $i (1..2) {
+ $w->scan('full');
+ }
+
+ # make sure all serials are searchable:
+ my ($tot, $msgs);
+ for my $i (1..15) {
+ ($tot, $msgs) = $ibx->search->query("alerts:$i");
+ is($tot, 1, "got one result for alerts:$i");
+ is($msgs->[0]->{mid}, "a.$i\@b.com", "got expected MID for $i");
+ }
+ ($tot, undef) = $ibx->search->query('b:spam');
+ is($tot, 1, 'got spam message');
+
+ my $nr = unlink <$maildir/new/*>;
+ is(16, $nr);
+ {
+ PublicInbox::Emergency->new($spamdir)->prepare(\$spam);
+ my @new = glob("$spamdir/new/*");
+ my @p = split(m!/+!, $new[0]);
+ ok(link($new[0], "$spamdir/cur/".$p[-1].":2,S"));
+ is(unlink($new[0]), 1);
+ }
+ $w->scan('full');
+
+ $config = PublicInbox::Config->new({%orig});
+ $ibx = $config->lookup_name($v);
+ ($tot, undef) = $ibx->search->reopen->query('b:spam');
+ is($tot, 0, 'spam removed');
+
+ is_deeply([], \@warn, 'no warnings');
+}
+
+done_testing();
--
EW
^ permalink raw reply related [flat|nested] 2+ messages in thread
* [2/1 PATCH] t/mda_filter_rubylang.t: set PI_EMERGENCY for -mda
2019-01-05 11:09 [PATCH] filter/rubylang: fix SQLite DB lifetime problems Eric Wong
@ 2019-01-07 5:22 ` Eric Wong
0 siblings, 0 replies; 2+ messages in thread
From: Eric Wong @ 2019-01-07 5:22 UTC (permalink / raw)
To: meta
Tests should not write to the default ~/.public-inbox/emergency
---
t/mda_filter_rubylang.t | 1 +
1 file changed, 1 insertion(+)
diff --git a/t/mda_filter_rubylang.t b/t/mda_filter_rubylang.t
index 0fb65a4..583a139 100644
--- a/t/mda_filter_rubylang.t
+++ b/t/mda_filter_rubylang.t
@@ -16,6 +16,7 @@ use_ok 'PublicInbox::V2Writable';
my $tmpdir = tempdir('mda-XXXXXX', TMPDIR => 1, CLEANUP => 1);
my $pi_config = "$tmpdir/pi_config";
local $ENV{PI_CONFIG} = $pi_config;
+local $ENV{PI_EMERGENCY} = "$tmpdir/emergency";
my $mda = 'blib/script/public-inbox-mda';
my @cfg = ('git', 'config', "--file=$pi_config");
is(system(@cfg, 'publicinboxmda.spamcheck', 'none'), 0);
--
EW
^ permalink raw reply related [flat|nested] 2+ messages in thread
end of thread, other threads:[~2019-01-07 5:22 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2019-01-05 11:09 [PATCH] filter/rubylang: fix SQLite DB lifetime problems Eric Wong
2019-01-07 5:22 ` [2/1 PATCH] t/mda_filter_rubylang.t: set PI_EMERGENCY for -mda Eric Wong
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).