From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-3.3 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF,URIBL_BLACK shortcircuit=no autolearn=no autolearn_force=no version=3.4.6 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 937AF1F4D0 for ; Mon, 30 Dec 2024 22:28:46 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1735597726; bh=og1t7t0fJIDD5NYH0Ftzrlv+A2F6aa+C/1oBNT/REXs=; h=From:To:Subject:Date:In-Reply-To:References:From; b=mjkgOZsJSmPUEEWq94muy79G8ghdQL/nXGN6F14qcXUyaTfvU8q8ZkCvsTvs/CZbR /eaY0w1y8eWB2IxP0Rm/J5rTAB1eyPsjk2Zh+lG71/5RPu+Pe0SO45ST5jo5kLa7PR OqrDQmAtVVc0NH3lCP1xOo37ss8fXk8xjJhUAGCs= From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 1/2] watch|mda|purge: Filter::*->scrub is destructive Date: Mon, 30 Dec 2024 22:28:45 +0000 Message-ID: <20241230222846.2251518-2-e@80x24.org> In-Reply-To: <20241230222846.2251518-1-e@80x24.org> References: <20241230222846.2251518-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: -watch was failing to properly account for inboxes having different filters and ->scrub behavior during message removal (but not adds). Explicitly duplicate and simplify the -watch code for handling message removal and update the rest of the callers to reflect the standardized destructive behavior of ->scrub. While most filters were destructive anyways, the Vger filter was a notable exception and updated to be destructive. We'll also update some variables from `$mime' uses to `$eml' to reflect the switch from (Email|PublicInbox)::MIME to PublicInbox::Eml years ago. Finally, the Vger filter includes updated comments and uses v5.10.1+ features, now. --- lib/PublicInbox/Filter/Vger.pm | 15 ++++++------- lib/PublicInbox/InboxWritable.pm | 2 -- lib/PublicInbox/Watch.pm | 36 +++++++++++--------------------- script/public-inbox-mda | 19 +++++++---------- script/public-inbox-purge | 12 +++++------ 5 files changed, 33 insertions(+), 51 deletions(-) diff --git a/lib/PublicInbox/Filter/Vger.pm b/lib/PublicInbox/Filter/Vger.pm index 5b3c0277..2202a217 100644 --- a/lib/PublicInbox/Filter/Vger.pm +++ b/lib/PublicInbox/Filter/Vger.pm @@ -1,9 +1,10 @@ -# Copyright (C) 2016-2021 all contributors +# Copyright (C) all contributors # License: AGPL-3.0+ # Filter for vger.kernel.org list trailer package PublicInbox::Filter::Vger; -use base qw(PublicInbox::Filter::Base); +use v5.10.1; # check regexps before v5.12 +use parent qw(PublicInbox::Filter::Base); use strict; use PublicInbox::Eml; @@ -18,16 +19,16 @@ my $l3 = my $l4 = qr!Please read the FAQ at +http://www\.tux\.org/lkml/!; sub scrub { - my ($self, $mime) = @_; - my $s = $mime->as_string; + my ($self, $eml) = @_; + my $s = $eml->as_string; - # the vger appender seems to only work on the raw string, + # the old vger appender seemed to only work on the raw string, # so in multipart (e.g. GPG-signed) messages, the list trailer # becomes invisible to MIME-aware email clients. if ($s =~ s/$l0\n$l1\n$l2\n$l3\n(?:$l4\n)?\n*\z//os) { - $mime = PublicInbox::Eml->new(\$s); + $_[1] = $eml= PublicInbox::Eml->new(\$s); } - $self->ACCEPT($mime); + $self->ACCEPT($eml); } sub delivery { diff --git a/lib/PublicInbox/InboxWritable.pm b/lib/PublicInbox/InboxWritable.pm index b995a8ad..18412cc3 100644 --- a/lib/PublicInbox/InboxWritable.pm +++ b/lib/PublicInbox/InboxWritable.pm @@ -129,7 +129,6 @@ sub _each_maildir_eml { if ($self && (my $filter = $self->filter($im))) { my $ret = $filter->scrub($eml) or return; return if $ret == REJECT(); - $eml = $ret; } $im->add($eml); } @@ -153,7 +152,6 @@ sub _mbox_eml_cb { # MboxReader->mbox* callback if ($filter) { my $ret = $filter->scrub($eml) or return; return if $ret == REJECT(); - $eml = $ret; } $im->add($eml); } diff --git a/lib/PublicInbox/Watch.pm b/lib/PublicInbox/Watch.pm index 5cde1d80..c270f587 100644 --- a/lib/PublicInbox/Watch.pm +++ b/lib/PublicInbox/Watch.pm @@ -176,30 +176,19 @@ sub remove_eml_i { # each_inbox callback eval { # try to avoid taking a lock or unnecessary spawning my $im = $self->{importers}->{"$ibx"}; - my $scrubbed; + my @eml = ($eml); + if (my $filter = $ibx->filter($im)) { + my $tmp = PublicInbox::Eml->new(\($eml->as_string)); + my $ret = $filter->scrub($tmp, 1); + push @eml, $tmp if $ret && $ret != REJECT; + } + if ((!$im || !$im->active) && $ibx->over) { - if (content_exists($ibx, $eml)) { - # continue - } elsif (my $scrub = $ibx->filter($im)) { - $scrubbed = $scrub->scrub($eml, 1); - if ($scrubbed && $scrubbed != REJECT && - !content_exists($ibx, $scrubbed)) { - return; - } - } else { - return; - } + @eml = grep { content_exists($ibx, $_) } @eml or return; } $im //= _importer_for($self, $ibx); # may spawn fast-import - $im->remove($eml, 'spam'); - $scrubbed //= do { - my $scrub = $ibx->filter($im); - $scrub ? $scrub->scrub($eml, 1) : undef; - }; - if ($scrubbed && $scrubbed != REJECT) { - $im->remove($scrubbed, 'spam'); - } + $im->remove($_, 'spam') for @eml; }; if ($@) { warn "error removing spam at: $loc from $ibx->{name}: $@\n"; @@ -232,10 +221,9 @@ sub import_eml ($$$) { } eval { my $im = _importer_for($self, $ibx); - if (my $scrub = $ibx->filter($im)) { - my $scrubbed = $scrub->scrub($eml) or return; - $scrubbed == REJECT and return; - $eml = $scrubbed; + if (my $filter = $ibx->filter($im)) { + my $ret = $filter->scrub($eml) or return; + $ret == REJECT and return; } $im->add($eml, $self->{spamcheck}); }; diff --git a/script/public-inbox-mda b/script/public-inbox-mda index 74202912..9d68ea35 100755 --- a/script/public-inbox-mda +++ b/script/public-inbox-mda @@ -120,26 +120,21 @@ my @rejects; for my $ibx (@$dests) { mda_filter_adjust($ibx); my $filter = $ibx->filter; - my $mime = PublicInbox::Eml->new($str); - my $ret = $filter->delivery($mime); - if (ref($ret) && ($ret->isa('PublicInbox::Eml') || - $ret->isa('Email::MIME'))) { # filter altered message - $mime = $ret; - } elsif ($ret == PublicInbox::Filter::Base::IGNORE) { - next; # nothing, keep looping - } elsif ($ret == PublicInbox::Filter::Base::REJECT) { + my $eml = PublicInbox::Eml->new($str); + my $ret = $filter->delivery($eml) or next; # $ret == IGNORE + if ($ret == PublicInbox::Filter::Base::REJECT) { push @rejects, $filter->err; next; - } + } # else success - PublicInbox::MDA->set_list_headers($mime, $ibx); + PublicInbox::MDA->set_list_headers($eml, $ibx); my $im = $ibx->importer(0); - if (defined $im->add($mime)) { + if (defined $im->add($eml)) { # ->abort is idempotent, no emergency if a single # destination succeeds $emm->abort; } else { # v1-only - my $mid = $mime->header_raw('Message-ID'); + my $mid = $eml->header_raw('Message-ID'); # this message is similar to what ssoma-mda shows: print STDERR "CONFLICT: Message-ID: $mid exists\n"; } diff --git a/script/public-inbox-purge b/script/public-inbox-purge index 618cfec4..381f58cd 100755 --- a/script/public-inbox-purge +++ b/script/public-inbox-purge @@ -38,16 +38,16 @@ PublicInbox::Eml::strip_from($data); my $n_purged = 0; foreach my $ibx (@ibxs) { - my $mime = PublicInbox::Eml->new($data); + my $eml = PublicInbox::Eml->new($data); my $v2w = PublicInbox::V2Writable->new($ibx, 0); - my $commits = $v2w->purge($mime) || []; + my $commits = $v2w->purge($eml) || []; - if (my $scrub = $ibx->filter($v2w)) { - my $scrubbed = $scrub->scrub($mime, 1); + if (my $filter = $ibx->filter($v2w)) { + my $ret = $filter->scrub($eml, 1); # destructive - if ($scrubbed && $scrubbed != REJECT()) { - my $scrub_commits = $v2w->purge($scrubbed); + if ($ret && $ret != REJECT) { + my $scrub_commits = $v2w->purge($eml); push @$commits, @$scrub_commits if $scrub_commits; } }