From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.2 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF shortcircuit=no autolearn=ham autolearn_force=no version=3.4.6 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 7202A1F4CC for ; Wed, 1 Jan 2025 23:11:53 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1735773113; bh=VkNtslFXA/YoDGC7SE+OQXVbxRWEmccXOmXAzP/Wj8U=; h=Date:From:To:Subject:References:In-Reply-To:From; b=ndcAdvBoU0pKk+xZP316oZz2fP06tfrdCGkuNBFCJs3mF2vEdWHQe0EFCnn5/rqSh O60J44J0smKgrXCi+WfckLRcBCPW+ka8M0We+RSQIsz/s53VcXrtrxzeCO7BOeHILO NzDLDOZweKCnljZZSc6cQ2F6TNXn7BZMgcbPCcEc= Date: Wed, 1 Jan 2025 23:11:53 +0000 From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 3/2] watch: don't propagate header changes to other inboxes Message-ID: <20250101231153.M358584@dcvr> References: <20241230222846.2251518-1-e@80x24.org> <20241230222846.2251518-3-e@80x24.org> MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Disposition: inline In-Reply-To: <20241230222846.2251518-3-e@80x24.org> List-Id: Message-IDs may be appended by v2 inboxes for messages with with conflicting Message-IDs or SpamAssassin (or another spam filter) can change headers of both v1 and v2 messages. So we rely on `local' to reach into Eml internals to localize ->header_set modifications and rely on modern Perl having copy-on-write scalars to minimize memory traffic in the common case. --- MANIFEST | 1 + lib/PublicInbox/Watch.pm | 4 ++ t/eml.t | 9 +++++ t/watch_v1_v2_mix_no_modify.t | 74 +++++++++++++++++++++++++++++++++++ 4 files changed, 88 insertions(+) create mode 100644 t/watch_v1_v2_mix_no_modify.t diff --git a/MANIFEST b/MANIFEST index b0b4f71c..30adab80 100644 --- a/MANIFEST +++ b/MANIFEST @@ -641,6 +641,7 @@ t/watch_maildir.t t/watch_maildir_v2.t t/watch_mh.t t/watch_multiple_headers.t +t/watch_v1_v2_mix_no_modify.t t/www_altid.t t/www_listing.t t/www_static.t diff --git a/lib/PublicInbox/Watch.pm b/lib/PublicInbox/Watch.pm index a37038b5..eb6eb85f 100644 --- a/lib/PublicInbox/Watch.pm +++ b/lib/PublicInbox/Watch.pm @@ -210,6 +210,10 @@ sub _remove_spam { sub import_eml ($$$) { my ($self, $ibx, $eml) = @_; + # v2 may add a new Message-ID header on conflicts w/ different content + # and SpamAssassin may alter headers. $copy is CoW in newer Perls. + local $eml->{hdr} = \(my $copy = ${$eml->{hdr}}); + # any header match means it's eligible for the inbox: if (my $watch_hdrs = $ibx->{-watchheaders}) { my $ok; diff --git a/t/eml.t b/t/eml.t index 690ada57..d63f7a3b 100644 --- a/t/eml.t +++ b/t/eml.t @@ -28,6 +28,15 @@ sub mime_load ($) { is($eml->as_string, "a: b\n\nhi\n", '->as_string'); my $empty = PublicInbox::Eml->new("\n\n"); is($empty->as_string, "\n\n", 'empty message'); + + # we use this pattern in -watch for writing an eml to multiple inboxes + my $in = do { # n.b. `my' must be used to assign a local var + local $eml->{hdr} = \(my $copy = ${$eml->{hdr}}); + $eml->header_set(qw(Message-ID )); + ${$eml->{hdr}}; + }; + like $in, qr/.*?/s, 'Message-ID set in copy'; + is $eml->header_raw('Message-ID'), undef, 'local $eml->{hdr} works'; } for my $cls (@classes) { diff --git a/t/watch_v1_v2_mix_no_modify.t b/t/watch_v1_v2_mix_no_modify.t new file mode 100644 index 00000000..759fd0bb --- /dev/null +++ b/t/watch_v1_v2_mix_no_modify.t @@ -0,0 +1,74 @@ +#!perl -w +# Copyright (C) all contributors +# License: AGPL-3.0+ +use v5.12; +use autodie; +use PublicInbox::TestCommon; +require_mods 'v2'; +use PublicInbox::InboxIdle; +use PublicInbox::DS qw(now); +use PublicInbox::IO qw(write_file); +use PublicInbox::Eml; +my $tmpdir = tmpdir; +local $ENV{HOME} = "$tmpdir"; +local $ENV{PI_CONFIG} = "$tmpdir/.public-inbox/config"; +my $msg = <<'EOM'; +From: a@example.com +To: v1@example.com, v2@example.com +Subject: test +Message-ID: + +hi +EOM + +for my $v (2, 1) { + run_script(['-init', "-V$v", "v$v", '-Lbasic', "$tmpdir/v$v", + "https://example.com/v$v", "v$v\@example.com" ]) or + xbail "init -V$v"; + write_file '>>', $ENV{PI_CONFIG}, "watch = maildir:$tmpdir/md$v"; + mkdir "$tmpdir/md$v/$_" for ('', qw(cur tmp new)); +} +my $orig = "$tmpdir/md2/cur/initial:2,S"; +write_file '>', $orig, $msg; + +my $delivered = 0; +my $cb = sub { + my ($ibx) = @_; + diag "message delivered to `$ibx->{name}'"; + $delivered++; +}; +my $ii = PublicInbox::InboxIdle->new(my $cfg = PublicInbox::Config->new); +my $obj = bless \$cb, 'PublicInbox::TestCommon::InboxWakeup'; +$cfg->each_inbox(sub { $_[0]->subscribe_unlock('ident', $obj) }); +my $exp = now + 10; +local @PublicInbox::DS::post_loop_do = (sub { $delivered < 1 || now > $exp}); + +my $rdr; +open $rdr->{2}, '>>', undef; +my $w = start_script(['-watch'], undef, $rdr); +diag 'waiting for -watch to import 2 existing messages..'; +PublicInbox::DS::add_timer 10, \&PublicInbox::Config::noop; +PublicInbox::DS::event_loop; +is $delivered, 1, 'delivered initial message'; + +$exp = now + 10; +PublicInbox::DS::add_timer 10, \&PublicInbox::Config::noop; +local @PublicInbox::DS::post_loop_do = (sub { $delivered < 3 || now > $exp }); +my $tmpmsg = "$tmpdir/md2/tmp/conflict:2,S"; +write_file '>', $tmpmsg, $msg, "hi\n"; +link $tmpmsg, "$tmpdir/md1/cur/conflict:2,S"; +rename $tmpmsg, "$tmpdir/md2/cur/conflict:2,S"; +PublicInbox::DS::event_loop; +is $delivered, 3, 'delivered new conflicting message to v2 and new to v1'; + +my $v1ibx = $cfg->lookup('v1@example.com'); +my $v1msgs = $v1ibx->over->recent; +is scalar(@$v1msgs), 1, 'only one message in v1'; +my $v1eml = PublicInbox::Eml->new($v1ibx->git->cat_file($v1msgs->[0]->{blob})); +my @mid = $v1eml->header_raw('Message-ID'); +is_deeply \@mid, [ qw[] ], 'only one Message-ID in v1 message'; + +my $v2msgs = $cfg->lookup('v2@example.com')->over->recent; +is scalar(@$v2msgs), 2, 'both messages in v2'; + +done_testing;