unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 1/2] watch|mda|purge: Filter::*->scrub is destructive
Date: Mon, 30 Dec 2024 22:28:45 +0000	[thread overview]
Message-ID: <20241230222846.2251518-2-e@80x24.org> (raw)
In-Reply-To: <20241230222846.2251518-1-e@80x24.org>

-watch was failing to properly account for inboxes having
different filters and ->scrub behavior during message removal
(but not adds).  Explicitly duplicate and simplify the -watch
code for handling message removal and update the rest of the
callers to reflect the standardized destructive behavior of
->scrub.  While most filters were destructive anyways, the Vger
filter was a notable exception and updated to be destructive.

We'll also update some variables from `$mime' uses to `$eml' to
reflect the switch from (Email|PublicInbox)::MIME to
PublicInbox::Eml years ago.

Finally, the Vger filter includes updated comments and uses
v5.10.1+ features, now.
---
 lib/PublicInbox/Filter/Vger.pm   | 15 ++++++-------
 lib/PublicInbox/InboxWritable.pm |  2 --
 lib/PublicInbox/Watch.pm         | 36 +++++++++++---------------------
 script/public-inbox-mda          | 19 +++++++----------
 script/public-inbox-purge        | 12 +++++------
 5 files changed, 33 insertions(+), 51 deletions(-)

diff --git a/lib/PublicInbox/Filter/Vger.pm b/lib/PublicInbox/Filter/Vger.pm
index 5b3c0277..2202a217 100644
--- a/lib/PublicInbox/Filter/Vger.pm
+++ b/lib/PublicInbox/Filter/Vger.pm
@@ -1,9 +1,10 @@
-# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # Filter for vger.kernel.org list trailer
 package PublicInbox::Filter::Vger;
-use base qw(PublicInbox::Filter::Base);
+use v5.10.1; # check regexps before v5.12
+use parent qw(PublicInbox::Filter::Base);
 use strict;
 use PublicInbox::Eml;
 
@@ -18,16 +19,16 @@ my $l3 =
 my $l4 = qr!Please read the FAQ at +http://www\.tux\.org/lkml/!;
 
 sub scrub {
-	my ($self, $mime) = @_;
-	my $s = $mime->as_string;
+	my ($self, $eml) = @_;
+	my $s = $eml->as_string;
 
-	# the vger appender seems to only work on the raw string,
+	# the old vger appender seemed to only work on the raw string,
 	# so in multipart (e.g. GPG-signed) messages, the list trailer
 	# becomes invisible to MIME-aware email clients.
 	if ($s =~ s/$l0\n$l1\n$l2\n$l3\n(?:$l4\n)?\n*\z//os) {
-		$mime = PublicInbox::Eml->new(\$s);
+		$_[1] = $eml= PublicInbox::Eml->new(\$s);
 	}
-	$self->ACCEPT($mime);
+	$self->ACCEPT($eml);
 }
 
 sub delivery {
diff --git a/lib/PublicInbox/InboxWritable.pm b/lib/PublicInbox/InboxWritable.pm
index b995a8ad..18412cc3 100644
--- a/lib/PublicInbox/InboxWritable.pm
+++ b/lib/PublicInbox/InboxWritable.pm
@@ -129,7 +129,6 @@ sub _each_maildir_eml {
 	if ($self && (my $filter = $self->filter($im))) {
 		my $ret = $filter->scrub($eml) or return;
 		return if $ret == REJECT();
-		$eml = $ret;
 	}
 	$im->add($eml);
 }
@@ -153,7 +152,6 @@ sub _mbox_eml_cb { # MboxReader->mbox* callback
 	if ($filter) {
 		my $ret = $filter->scrub($eml) or return;
 		return if $ret == REJECT();
-		$eml = $ret;
 	}
 	$im->add($eml);
 }
diff --git a/lib/PublicInbox/Watch.pm b/lib/PublicInbox/Watch.pm
index 5cde1d80..c270f587 100644
--- a/lib/PublicInbox/Watch.pm
+++ b/lib/PublicInbox/Watch.pm
@@ -176,30 +176,19 @@ sub remove_eml_i { # each_inbox callback
 	eval {
 		# try to avoid taking a lock or unnecessary spawning
 		my $im = $self->{importers}->{"$ibx"};
-		my $scrubbed;
+		my @eml = ($eml);
+		if (my $filter = $ibx->filter($im)) {
+			my $tmp = PublicInbox::Eml->new(\($eml->as_string));
+			my $ret = $filter->scrub($tmp, 1);
+			push @eml, $tmp if $ret && $ret != REJECT;
+		}
+
 		if ((!$im || !$im->active) && $ibx->over) {
-			if (content_exists($ibx, $eml)) {
-				# continue
-			} elsif (my $scrub = $ibx->filter($im)) {
-				$scrubbed = $scrub->scrub($eml, 1);
-				if ($scrubbed && $scrubbed != REJECT &&
-					  !content_exists($ibx, $scrubbed)) {
-					return;
-				}
-			} else {
-				return;
-			}
+			@eml = grep { content_exists($ibx, $_) } @eml or return;
 		}
 
 		$im //= _importer_for($self, $ibx); # may spawn fast-import
-		$im->remove($eml, 'spam');
-		$scrubbed //= do {
-			my $scrub = $ibx->filter($im);
-			$scrub ? $scrub->scrub($eml, 1) : undef;
-		};
-		if ($scrubbed && $scrubbed != REJECT) {
-			$im->remove($scrubbed, 'spam');
-		}
+		$im->remove($_, 'spam') for @eml;
 	};
 	if ($@) {
 		warn "error removing spam at: $loc from $ibx->{name}: $@\n";
@@ -232,10 +221,9 @@ sub import_eml ($$$) {
 	}
 	eval {
 		my $im = _importer_for($self, $ibx);
-		if (my $scrub = $ibx->filter($im)) {
-			my $scrubbed = $scrub->scrub($eml) or return;
-			$scrubbed == REJECT and return;
-			$eml = $scrubbed;
+		if (my $filter = $ibx->filter($im)) {
+			my $ret = $filter->scrub($eml) or return;
+			$ret == REJECT and return;
 		}
 		$im->add($eml, $self->{spamcheck});
 	};
diff --git a/script/public-inbox-mda b/script/public-inbox-mda
index 74202912..9d68ea35 100755
--- a/script/public-inbox-mda
+++ b/script/public-inbox-mda
@@ -120,26 +120,21 @@ my @rejects;
 for my $ibx (@$dests) {
 	mda_filter_adjust($ibx);
 	my $filter = $ibx->filter;
-	my $mime = PublicInbox::Eml->new($str);
-	my $ret = $filter->delivery($mime);
-	if (ref($ret) && ($ret->isa('PublicInbox::Eml') ||
-			$ret->isa('Email::MIME'))) { # filter altered message
-		$mime = $ret;
-	} elsif ($ret == PublicInbox::Filter::Base::IGNORE) {
-		next; # nothing, keep looping
-	} elsif ($ret == PublicInbox::Filter::Base::REJECT) {
+	my $eml = PublicInbox::Eml->new($str);
+	my $ret = $filter->delivery($eml) or next; # $ret == IGNORE
+	if ($ret == PublicInbox::Filter::Base::REJECT) {
 		push @rejects, $filter->err;
 		next;
-	}
+	} # else success
 
-	PublicInbox::MDA->set_list_headers($mime, $ibx);
+	PublicInbox::MDA->set_list_headers($eml, $ibx);
 	my $im = $ibx->importer(0);
-	if (defined $im->add($mime)) {
+	if (defined $im->add($eml)) {
 		# ->abort is idempotent, no emergency if a single
 		# destination succeeds
 		$emm->abort;
 	} else { # v1-only
-		my $mid = $mime->header_raw('Message-ID');
+		my $mid = $eml->header_raw('Message-ID');
 		# this message is similar to what ssoma-mda shows:
 		print STDERR "CONFLICT: Message-ID: $mid exists\n";
 	}
diff --git a/script/public-inbox-purge b/script/public-inbox-purge
index 618cfec4..381f58cd 100755
--- a/script/public-inbox-purge
+++ b/script/public-inbox-purge
@@ -38,16 +38,16 @@ PublicInbox::Eml::strip_from($data);
 my $n_purged = 0;
 
 foreach my $ibx (@ibxs) {
-	my $mime = PublicInbox::Eml->new($data);
+	my $eml = PublicInbox::Eml->new($data);
 	my $v2w = PublicInbox::V2Writable->new($ibx, 0);
 
-	my $commits = $v2w->purge($mime) || [];
+	my $commits = $v2w->purge($eml) || [];
 
-	if (my $scrub = $ibx->filter($v2w)) {
-		my $scrubbed = $scrub->scrub($mime, 1);
+	if (my $filter = $ibx->filter($v2w)) {
+		my $ret = $filter->scrub($eml, 1); # destructive
 
-		if ($scrubbed && $scrubbed != REJECT()) {
-			my $scrub_commits = $v2w->purge($scrubbed);
+		if ($ret && $ret != REJECT) {
+			my $scrub_commits = $v2w->purge($eml);
 			push @$commits, @$scrub_commits if $scrub_commits;
 		}
 	}

  reply	other threads:[~2024-12-30 22:28 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-12-30 22:28 [PATCH 0/2] watch-related fixes for filter users Eric Wong
2024-12-30 22:28 ` Eric Wong [this message]
2024-12-30 22:28 ` [PATCH 2/2] watch: import_eml: avoid Eml dup for non-scrub case Eric Wong
2025-01-01 23:11   ` [PATCH 3/2] watch: don't propagate header changes to other inboxes Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20241230222846.2251518-2-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).