From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 1/2] watch|mda|purge: Filter::*->scrub is destructive
Date: Mon, 30 Dec 2024 22:28:45 +0000 [thread overview]
Message-ID: <20241230222846.2251518-2-e@80x24.org> (raw)
In-Reply-To: <20241230222846.2251518-1-e@80x24.org>
-watch was failing to properly account for inboxes having
different filters and ->scrub behavior during message removal
(but not adds). Explicitly duplicate and simplify the -watch
code for handling message removal and update the rest of the
callers to reflect the standardized destructive behavior of
->scrub. While most filters were destructive anyways, the Vger
filter was a notable exception and updated to be destructive.
We'll also update some variables from `$mime' uses to `$eml' to
reflect the switch from (Email|PublicInbox)::MIME to
PublicInbox::Eml years ago.
Finally, the Vger filter includes updated comments and uses
v5.10.1+ features, now.
---
lib/PublicInbox/Filter/Vger.pm | 15 ++++++-------
lib/PublicInbox/InboxWritable.pm | 2 --
lib/PublicInbox/Watch.pm | 36 +++++++++++---------------------
script/public-inbox-mda | 19 +++++++----------
script/public-inbox-purge | 12 +++++------
5 files changed, 33 insertions(+), 51 deletions(-)
diff --git a/lib/PublicInbox/Filter/Vger.pm b/lib/PublicInbox/Filter/Vger.pm
index 5b3c0277..2202a217 100644
--- a/lib/PublicInbox/Filter/Vger.pm
+++ b/lib/PublicInbox/Filter/Vger.pm
@@ -1,9 +1,10 @@
-# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Filter for vger.kernel.org list trailer
package PublicInbox::Filter::Vger;
-use base qw(PublicInbox::Filter::Base);
+use v5.10.1; # check regexps before v5.12
+use parent qw(PublicInbox::Filter::Base);
use strict;
use PublicInbox::Eml;
@@ -18,16 +19,16 @@ my $l3 =
my $l4 = qr!Please read the FAQ at +http://www\.tux\.org/lkml/!;
sub scrub {
- my ($self, $mime) = @_;
- my $s = $mime->as_string;
+ my ($self, $eml) = @_;
+ my $s = $eml->as_string;
- # the vger appender seems to only work on the raw string,
+ # the old vger appender seemed to only work on the raw string,
# so in multipart (e.g. GPG-signed) messages, the list trailer
# becomes invisible to MIME-aware email clients.
if ($s =~ s/$l0\n$l1\n$l2\n$l3\n(?:$l4\n)?\n*\z//os) {
- $mime = PublicInbox::Eml->new(\$s);
+ $_[1] = $eml= PublicInbox::Eml->new(\$s);
}
- $self->ACCEPT($mime);
+ $self->ACCEPT($eml);
}
sub delivery {
diff --git a/lib/PublicInbox/InboxWritable.pm b/lib/PublicInbox/InboxWritable.pm
index b995a8ad..18412cc3 100644
--- a/lib/PublicInbox/InboxWritable.pm
+++ b/lib/PublicInbox/InboxWritable.pm
@@ -129,7 +129,6 @@ sub _each_maildir_eml {
if ($self && (my $filter = $self->filter($im))) {
my $ret = $filter->scrub($eml) or return;
return if $ret == REJECT();
- $eml = $ret;
}
$im->add($eml);
}
@@ -153,7 +152,6 @@ sub _mbox_eml_cb { # MboxReader->mbox* callback
if ($filter) {
my $ret = $filter->scrub($eml) or return;
return if $ret == REJECT();
- $eml = $ret;
}
$im->add($eml);
}
diff --git a/lib/PublicInbox/Watch.pm b/lib/PublicInbox/Watch.pm
index 5cde1d80..c270f587 100644
--- a/lib/PublicInbox/Watch.pm
+++ b/lib/PublicInbox/Watch.pm
@@ -176,30 +176,19 @@ sub remove_eml_i { # each_inbox callback
eval {
# try to avoid taking a lock or unnecessary spawning
my $im = $self->{importers}->{"$ibx"};
- my $scrubbed;
+ my @eml = ($eml);
+ if (my $filter = $ibx->filter($im)) {
+ my $tmp = PublicInbox::Eml->new(\($eml->as_string));
+ my $ret = $filter->scrub($tmp, 1);
+ push @eml, $tmp if $ret && $ret != REJECT;
+ }
+
if ((!$im || !$im->active) && $ibx->over) {
- if (content_exists($ibx, $eml)) {
- # continue
- } elsif (my $scrub = $ibx->filter($im)) {
- $scrubbed = $scrub->scrub($eml, 1);
- if ($scrubbed && $scrubbed != REJECT &&
- !content_exists($ibx, $scrubbed)) {
- return;
- }
- } else {
- return;
- }
+ @eml = grep { content_exists($ibx, $_) } @eml or return;
}
$im //= _importer_for($self, $ibx); # may spawn fast-import
- $im->remove($eml, 'spam');
- $scrubbed //= do {
- my $scrub = $ibx->filter($im);
- $scrub ? $scrub->scrub($eml, 1) : undef;
- };
- if ($scrubbed && $scrubbed != REJECT) {
- $im->remove($scrubbed, 'spam');
- }
+ $im->remove($_, 'spam') for @eml;
};
if ($@) {
warn "error removing spam at: $loc from $ibx->{name}: $@\n";
@@ -232,10 +221,9 @@ sub import_eml ($$$) {
}
eval {
my $im = _importer_for($self, $ibx);
- if (my $scrub = $ibx->filter($im)) {
- my $scrubbed = $scrub->scrub($eml) or return;
- $scrubbed == REJECT and return;
- $eml = $scrubbed;
+ if (my $filter = $ibx->filter($im)) {
+ my $ret = $filter->scrub($eml) or return;
+ $ret == REJECT and return;
}
$im->add($eml, $self->{spamcheck});
};
diff --git a/script/public-inbox-mda b/script/public-inbox-mda
index 74202912..9d68ea35 100755
--- a/script/public-inbox-mda
+++ b/script/public-inbox-mda
@@ -120,26 +120,21 @@ my @rejects;
for my $ibx (@$dests) {
mda_filter_adjust($ibx);
my $filter = $ibx->filter;
- my $mime = PublicInbox::Eml->new($str);
- my $ret = $filter->delivery($mime);
- if (ref($ret) && ($ret->isa('PublicInbox::Eml') ||
- $ret->isa('Email::MIME'))) { # filter altered message
- $mime = $ret;
- } elsif ($ret == PublicInbox::Filter::Base::IGNORE) {
- next; # nothing, keep looping
- } elsif ($ret == PublicInbox::Filter::Base::REJECT) {
+ my $eml = PublicInbox::Eml->new($str);
+ my $ret = $filter->delivery($eml) or next; # $ret == IGNORE
+ if ($ret == PublicInbox::Filter::Base::REJECT) {
push @rejects, $filter->err;
next;
- }
+ } # else success
- PublicInbox::MDA->set_list_headers($mime, $ibx);
+ PublicInbox::MDA->set_list_headers($eml, $ibx);
my $im = $ibx->importer(0);
- if (defined $im->add($mime)) {
+ if (defined $im->add($eml)) {
# ->abort is idempotent, no emergency if a single
# destination succeeds
$emm->abort;
} else { # v1-only
- my $mid = $mime->header_raw('Message-ID');
+ my $mid = $eml->header_raw('Message-ID');
# this message is similar to what ssoma-mda shows:
print STDERR "CONFLICT: Message-ID: $mid exists\n";
}
diff --git a/script/public-inbox-purge b/script/public-inbox-purge
index 618cfec4..381f58cd 100755
--- a/script/public-inbox-purge
+++ b/script/public-inbox-purge
@@ -38,16 +38,16 @@ PublicInbox::Eml::strip_from($data);
my $n_purged = 0;
foreach my $ibx (@ibxs) {
- my $mime = PublicInbox::Eml->new($data);
+ my $eml = PublicInbox::Eml->new($data);
my $v2w = PublicInbox::V2Writable->new($ibx, 0);
- my $commits = $v2w->purge($mime) || [];
+ my $commits = $v2w->purge($eml) || [];
- if (my $scrub = $ibx->filter($v2w)) {
- my $scrubbed = $scrub->scrub($mime, 1);
+ if (my $filter = $ibx->filter($v2w)) {
+ my $ret = $filter->scrub($eml, 1); # destructive
- if ($scrubbed && $scrubbed != REJECT()) {
- my $scrub_commits = $v2w->purge($scrubbed);
+ if ($ret && $ret != REJECT) {
+ my $scrub_commits = $v2w->purge($eml);
push @$commits, @$scrub_commits if $scrub_commits;
}
}
next prev parent reply other threads:[~2024-12-30 22:28 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-12-30 22:28 [PATCH 0/2] watch-related fixes for filter users Eric Wong
2024-12-30 22:28 ` Eric Wong [this message]
2024-12-30 22:28 ` [PATCH 2/2] watch: import_eml: avoid Eml dup for non-scrub case Eric Wong
2025-01-01 23:11 ` [PATCH 3/2] watch: don't propagate header changes to other inboxes Eric Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241230222846.2251518-2-e@80x24.org \
--to=e@80x24.org \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).