From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <e@80x24.org>
X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on dcvr.yhbt.net
X-Spam-Level: 
X-Spam-ASN:  
X-Spam-Status: No, score=-4.2 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00,
	DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF,
	T_SCC_BODY_TEXT_LINE,URIBL_SBL_A shortcircuit=no autolearn=ham
	autolearn_force=no version=3.4.6
Received: from localhost (dcvr.yhbt.net [127.0.0.1])
	by dcvr.yhbt.net (Postfix) with ESMTP id DE36B1F461
	for <meta@public-inbox.org>; Sat, 11 Nov 2023 09:04:59 +0000 (UTC)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org;
	s=selector1; t=1699693500;
	bh=iUy6oevFoqPuD/SCo67nCrSwAxzqIRHY6iNmzpaO2I0=;
	h=From:To:Subject:Date:In-Reply-To:References:From;
	b=fo6HRCsbjXXHfZDIt1V63oNQEWkGG5ekG+/iBRDiRatf9F/t5pekS73PgIKv/ybXL
	 nLodTDmhR1ewFOItNs1C/uVRKhyShp6rB0l1nHsWwZzr9G5YiIohBuwzLVkNof5nXp
	 n9SbrMihF2sCo+60LI/CrTmgYrpjLly3s2SwBHHg=
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 2/4] mda|learn|watch: support dropUniqueUnsubscribe config
Date: Sat, 11 Nov 2023 09:04:57 +0000
Message-Id: <20231111090459.605791-3-e@80x24.org>
In-Reply-To: <20231111090459.605791-1-e@80x24.org>
References: <20231111090459.605791-1-e@80x24.org>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
List-Id: <meta.public-inbox.org>

List-Unsubscribe headers with unique identifiers (such as those
generated by our examples/unsubscribe.milter) should not
end up in public archives.  Add a new config knob to strip
List-Unsubscribe headers if they have the
`List-Unsubscribe-Post: List-Unsubscribe=One-Click'
header.

Unfortunately, this breaks DKIM signatures if the signature
covers either of these List-Unsubscribe* headers.  However,
breaking DKIM is the lesser evil compared to any archive reader
being able to stop archival by an independent archivist.

As much as I would like this to be the default, it probably
affects few users at the moment since very few mailing lists
use unique identifiers in List-Unsubscribe (but that number
has grown, recently).
---
 Documentation/public-inbox-config.pod | 17 ++++++++++
 Documentation/public-inbox-learn.pod  | 19 +++++++++++
 Documentation/public-inbox-mda.pod    | 18 +++++++++-
 Documentation/public-inbox-watch.pod  |  6 +++-
 lib/PublicInbox/Import.pm             | 27 +++++++++++++++
 lib/PublicInbox/LeiToMail.pm          |  6 ++++
 lib/PublicInbox/Watch.pm              |  1 +
 script/public-inbox-learn             |  3 ++
 script/public-inbox-mda               |  4 +++
 script/public-inbox-watch             |  2 ++
 t/lei-import.t                        | 48 ++++++++++++++++++++++++++-
 t/mda.t                               | 41 ++++++++++++++++++++---
 t/watch_maildir.t                     | 30 +++++++++++++++--
 13 files changed, 212 insertions(+), 10 deletions(-)

diff --git a/Documentation/public-inbox-config.pod b/Documentation/public-inbox-config.pod
index 871ac6c5..1ef7f46f 100644
--- a/Documentation/public-inbox-config.pod
+++ b/Documentation/public-inbox-config.pod
@@ -196,6 +196,23 @@ and the path may be "/dev/null" or any empty file.
 Multiple files may be specified and will be included in the
 order specified.
 
+=item publicinboxImport.dropUniqueUnsubscribe
+
+Drop C<List-Unsubscribe> headers if the message also includes
+the C<List-Unsubscribe-Post: List-Unsubscribe=One-Click> header
+to signal MUAs to support an instantaneous unsubscribe.  This
+is strongly recommended for users creating their own public
+archives of mailing lists they subscribe to, otherwise any
+archive reader can unsubscribe the archivist.
+
+This may break DKIM signatures if the C<List-Unsubscribe*>
+headers are signed, but breaking DKIM signatures is the
+lesser evil compared to allowing any reader to unsubscribe
+the archivist.
+
+This affects L<public-inbox-mda(1)>, L<public-inbox-watch(1)>,
+and L<public-inbox-learn(1)>
+
 =item publicinboxmda.spamcheck
 
 This may be set to C<none> to disable the use of SpamAssassin
diff --git a/Documentation/public-inbox-learn.pod b/Documentation/public-inbox-learn.pod
index f776df6b..b08e4bc8 100644
--- a/Documentation/public-inbox-learn.pod
+++ b/Documentation/public-inbox-learn.pod
@@ -73,6 +73,25 @@ Default: ~/.public-inbox/config
 
 =back
 
+=head1 CONFIGURATION
+
+These configuration knobs should be used in the
+L<public-inbox-config(5)> file.
+
+=over 8
+
+=item publicinboxImport.dropUniqueUnsubscribe
+
+=item publicinbox.<name>.address
+
+=item publicinbox.<name>.listid
+
+=item publicinboxmda.spamcheck
+
+See L<public-inbox-config(5)> for descriptions of these options
+
+=back
+
 =head1 CONTACT
 
 Feedback welcome via plain-text mail to L<mailto:meta@public-inbox.org>
diff --git a/Documentation/public-inbox-mda.pod b/Documentation/public-inbox-mda.pod
index 93cb0e9c..edc90287 100644
--- a/Documentation/public-inbox-mda.pod
+++ b/Documentation/public-inbox-mda.pod
@@ -68,6 +68,22 @@ Default: ~/.public-inbox/emergency/
 
 =back
 
+=head1 CONFIGURATION
+
+Various configuration knobs should be used in the
+L<public-inbox-config(5)> file.
+
+=over 8
+
+=item publicinboxImport.dropUniqueUnsubscribe
+
+=item publicinbox.<name>.address
+
+=item publicinbox.<name>.listid
+
+See L<public-inbox-config(5)> for descriptions of these options
+
+=back
 
 =head1 CONTACT
 
@@ -78,7 +94,7 @@ L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/>
 
 =head1 COPYRIGHT
 
-Copyright 2013-2021 all contributors L<mailto:meta@public-inbox.org>
+Copyright all contributors L<mailto:meta@public-inbox.org>
 
 License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt>
 
diff --git a/Documentation/public-inbox-watch.pod b/Documentation/public-inbox-watch.pod
index febda0b1..7c21f7ce 100644
--- a/Documentation/public-inbox-watch.pod
+++ b/Documentation/public-inbox-watch.pod
@@ -66,6 +66,10 @@ L<public-inbox-config(5)> file.
 
 =over 8
 
+=item publicinboxImport.dropUniqueUnsubscribe
+
+See L<public-inbox-config(5)/publicinboxImport.dropUniqueUnsubscribe>
+
 =item publicinbox.<name>.watch
 
 A location to watch.  public-inbox 1.5.0 and earlier only supported
@@ -201,7 +205,7 @@ L<http://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/meta/>
 
 =head1 COPYRIGHT
 
-Copyright 2016-2021 all contributors L<mailto:meta@public-inbox.org>
+Copyright all contributors L<mailto:meta@public-inbox.org>
 
 License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt>
 
diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm
index 2d60db55..e4f8615e 100644
--- a/lib/PublicInbox/Import.pm
+++ b/lib/PublicInbox/Import.pm
@@ -321,11 +321,38 @@ sub extract_cmt_info ($;$) {
 # kill potentially confusing/misleading headers
 our @UNWANTED_HEADERS = (qw(Bytes Lines Content-Length),
 			qw(Status X-Status));
+our $DROP_UNIQUE_UNSUB;
 sub drop_unwanted_headers ($) {
 	my ($eml) = @_;
 	for (@UNWANTED_HEADERS, @PublicInbox::MDA::BAD_HEADERS) {
 		$eml->header_set($_);
 	}
+
+	# We don't want public-inbox readers to be able to unsubcribe the
+	# address which does archiving.  WARNING: this breaks DKIM if the
+	# mailing list sender follows RFC 8058, section 4; but breaking DKIM
+	# (or have senders ignore RFC 8058 sec. 4) is preferable to having
+	# saboteurs unsubscribing independent archivists:
+	if ($DROP_UNIQUE_UNSUB && grep(/\AList-Unsubscribe=One-Click\z/,
+				$eml->header_raw('List-Unsubscribe-Post'))) {
+		for (qw(List-Unsubscribe-Post List-Unsubscribe)) {
+			$eml->header_set($_)
+		}
+	}
+}
+
+sub load_config ($;$) {
+	my ($cfg, $do_exit) = @_;
+	my $v = $cfg->{lc 'publicinboxImport.dropUniqueUnsubscribe'};
+	if (defined $v) {
+		$DROP_UNIQUE_UNSUB = $cfg->git_bool($v) // do {
+			warn <<EOM;
+E: publicinboxImport.dropUniqueUnsubscribe=$v in $cfg->{-f} is not boolean
+EOM
+			$do_exit //= \&CORE::exit;
+			$do_exit->(78); # EX_CONFIG
+		};
+	}
 }
 
 # used by V2Writable, too
diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index b73af68a..0d2f586a 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -10,6 +10,7 @@ use PublicInbox::Eml;
 use PublicInbox::IO;
 use PublicInbox::Git;
 use PublicInbox::Spawn qw(spawn);
+use PublicInbox::Import;
 use IO::Handle; # ->autoflush
 use Fcntl qw(SEEK_SET SEEK_END O_CREAT O_EXCL O_WRONLY);
 use PublicInbox::Syscall qw(rename_noreplace);
@@ -672,6 +673,11 @@ sub _pre_augment_v2 {
 		});
 	}
 	PublicInbox::InboxWritable->new($ibx, @creat);
+	local $PublicInbox::Import::DROP_UNIQUE_UNSUB; # only for workers
+	PublicInbox::Import::load_config(PublicInbox::Config->new, sub {
+		$lei->x_it(shift);
+		die "E: can't write v2 inbox with broken config\n";
+	});
 	$ibx->init_inbox if @creat;
 	my $v2w = $ibx->importer;
 	$v2w->wq_workers_start("lei/v2w $dir", 1, $lei->oldset, {lei => $lei},
diff --git a/lib/PublicInbox/Watch.pm b/lib/PublicInbox/Watch.pm
index 1cdf12a5..5253ec94 100644
--- a/lib/PublicInbox/Watch.pm
+++ b/lib/PublicInbox/Watch.pm
@@ -45,6 +45,7 @@ sub new {
 	my (%mdmap);
 	my (%imap, %nntp); # url => [inbox objects] or 'watchspam'
 	my (@imap, @nntp);
+	PublicInbox::Import::load_config($cfg);
 
 	# "publicinboxwatch" is the documented namespace
 	# "publicinboxlearn" is legacy but may be supported
diff --git a/script/public-inbox-learn b/script/public-inbox-learn
index 8069d919..6a1bc890 100755
--- a/script/public-inbox-learn
+++ b/script/public-inbox-learn
@@ -28,6 +28,7 @@ use PublicInbox::Spamcheck::Spamc;
 use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev);
 my %opt = (all => 0);
 GetOptions(\%opt, qw(all help|h)) or die $help;
+use PublicInbox::Import;
 
 my $train = shift or die $help;
 if ($train !~ /\A(?:ham|spam|rm)\z/) {
@@ -37,6 +38,8 @@ die "--all only works with `rm'\n" if $opt{all} && $train ne 'rm';
 
 my $spamc = PublicInbox::Spamcheck::Spamc->new;
 my $pi_cfg = PublicInbox::Config->new;
+local $PublicInbox::Import::DROP_UNIQUE_UNSUB;
+PublicInbox::Import::load_config($pi_cfg);
 my $err;
 my $mime = PublicInbox::Eml->new(do{
 	defined(my $data = do { local $/; <STDIN> }) or die "read STDIN: $!\n";
diff --git a/script/public-inbox-mda b/script/public-inbox-mda
index cac819ac..04fd8aad 100755
--- a/script/public-inbox-mda
+++ b/script/public-inbox-mda
@@ -16,6 +16,8 @@ use strict;
 use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev);
 my ($ems, $emm, $show_help);
 my $precheck = 1;
+use PublicInbox::Import;
+local $PublicInbox::Import::DROP_UNIQUE_UNSUB; # does this need a CLI switch?
 GetOptions('precheck!' => \$precheck, 'help|h' => \$show_help) or
 	do { print STDERR $help; exit 1 };
 
@@ -47,6 +49,8 @@ my $key = 'publicinboxmda.spamcheck';
 my $default = 'PublicInbox::Spamcheck::Spamc';
 my $spamc = PublicInbox::Spamcheck::get($cfg, $key, $default);
 my $dests = [];
+PublicInbox::Import::load_config($cfg, $do_exit);
+
 my $recipient = $ENV{ORIGINAL_RECIPIENT};
 if (defined $recipient) {
 	my $ibx = $cfg->lookup($recipient); # first check
diff --git a/script/public-inbox-watch b/script/public-inbox-watch
index d9215de9..9bcd42ed 100755
--- a/script/public-inbox-watch
+++ b/script/public-inbox-watch
@@ -11,6 +11,8 @@ use strict;
 use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev);
 use IO::Handle; # ->autoflush
 use PublicInbox::Watch;
+use PublicInbox::Import;
+local $PublicInbox::Import::DROP_UNIQUE_UNSUB;
 use PublicInbox::Config;
 use PublicInbox::DS;
 my $do_scan = 1;
diff --git a/t/lei-import.t b/t/lei-import.t
index 1edd607d..bd562617 100644
--- a/t/lei-import.t
+++ b/t/lei-import.t
@@ -3,7 +3,8 @@
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 use v5.12; use PublicInbox::TestCommon;
 use PublicInbox::DS qw(now);
-use autodie qw(open close);
+use PublicInbox::IO qw(write_file);
+use autodie qw(open close truncate);
 test_lei(sub {
 ok(!lei(qw(import -F bogus), 't/plack-qp.eml'), 'fails with bogus format');
 like($lei_err, qr/\bis `eml', not --in-format/, 'gave error message');
@@ -180,6 +181,51 @@ SKIP: {
 		'EIO noted in stderr');
 }
 
+{
+	local $ENV{PI_CONFIG} = "$ENV{HOME}/pi_config";
+	write_file '>', $ENV{PI_CONFIG}, <<EOM;
+[publicinboxImport]
+	dropUniqueUnsubscribe
+EOM
+	my $in = <<EOM;
+List-Unsubscribe: <https://example.com/some-UUID-here/test>
+List-Unsubscribe-Post: List-Unsubscribe=One-Click
+Message-ID: <unsubscribe-1\@example>
+Subject: unsubscribe-1 example
+From: u\@example.com
+To: 2\@example.com
+Date: Fri, 02 Oct 1993 00:00:00 +0000
+
+EOM
+	lei_ok [qw(import -F eml +L:unsub)], undef, { %$lei_opt, 0 => \$in },
+		'import succeeds w/ List-Unsubscribe';
+	lei_ok qw(q L:unsub -f mboxrd);
+	like $lei_out, qr/some-UUID-here/,
+		'Unsubscribe header preserved despite PI_CONFIG dropping';
+	lei_ok qw(q L:unsub -o), "v2:$ENV{HOME}/v2-1";
+	lei_ok qw(q s:unsubscribe -f mboxrd --only), "$ENV{HOME}/v2-1";
+	unlike $lei_out, qr/some-UUID-here/,
+		'Unsubscribe header dropped w/ dropUniqueUnsubscribe';
+	like $lei_out, qr/Message-ID: <unsubscribe-1\@example>/,
+		'wrote expected message to v2 output';
+
+	# the default for compatibility:
+	truncate $ENV{PI_CONFIG}, 0;
+	lei_ok qw(q L:unsub -o), "v2:$ENV{HOME}/v2-2";
+	lei_ok qw(q s:unsubscribe -f mboxrd --only), "$ENV{HOME}/v2-2";
+	like $lei_out, qr/some-UUID-here/,
+		'Unsubscribe header preserved by default :<';
+
+	# ensure we can fail
+	write_file '>', $ENV{PI_CONFIG}, <<EOM;
+[publicinboxImport]
+	dropUniqueUnsubscribe = bogus
+EOM
+	ok(!lei(qw(q L:unsub -o), "v2:$ENV{HOME}/v2-3"), 'bad config fails');
+	like $lei_err, qr/is not boolean/, 'non-booleaness noted in stderr';
+	ok !-d "$ENV{HOME}/v2-3", 'v2 directory not created';
+}
+
 # see t/lei_to_mail.t for "import -F mbox*"
 });
 done_testing;
diff --git a/t/mda.t b/t/mda.t
index 83b0b33a..5144f3ca 100644
--- a/t/mda.t
+++ b/t/mda.t
@@ -8,6 +8,7 @@ use PublicInbox::Git;
 use PublicInbox::InboxWritable;
 use PublicInbox::TestCommon;
 use PublicInbox::Import;
+use PublicInbox::IO qw(write_file);
 use File::Path qw(remove_tree);
 my ($tmpdir, $for_destroy) = tmpdir();
 my $home = "$tmpdir/pi-home";
@@ -49,13 +50,11 @@ my $fail_bad_header = sub ($$$) {
 	is(1, mkdir($pi_home, 0755), "setup ~/.public-inbox");
 	PublicInbox::Import::init_bare($maindir);
 
-	open my $fh, '>>', $pi_config or die;
-	print $fh <<EOF or die;
+	write_file '>>', $pi_config, <<EOF;
 [publicinbox "test"]
 	address = $addr
 	inboxdir = $maindir
 EOF
-	close $fh or die;
 }
 
 local $ENV{GIT_COMMITTER_NAME} = eval {
@@ -306,10 +305,44 @@ EOF
 	# ensure -learn rm works after inbox address is updated
 	($out, $err) = ('', '');
 	xsys(qw(git config --file), $pi_config, "$cfgpfx.address",
-		'updated-address@example.com');
+		$addr = 'updated-address@example.com');
 	ok(run_script(['-learn', 'rm'], undef, $rdr), 'rm-ed via -learn');
 	$cur = $git->qx(qw(diff HEAD~1..HEAD));
 	like($cur, qr/^-Message-ID: <2lids\@example>/sm, 'changed in git');
+
+	# ensure we can strip List-Unsubscribe
+	$in = <<EOF;
+To: You <you\@example.com>
+List-Id: <$list_id>
+Message-ID: <unsubscribe-1\@example>
+Subject: unsubscribe-1
+From: user <user\@example.com>
+To: $addr
+Date: Fri, 02 Oct 1993 00:00:00 +0000
+List-Unsubscribe: <https://example.com/some-UUID-here/listname>
+List-Unsubscribe-Post: List-Unsubscribe=One-Click
+
+List-Unsubscribe should be stripped
+EOF
+	write_file '>>', $pi_config, <<EOM;
+[publicinboxImport]
+	dropUniqueUnsubscribe
+EOM
+	$out = $err = '';
+	ok(run_script([qw(-mda)], undef, $rdr), 'mda w/ dropUniqueUnsubscribe');
+	$cur = join('', grep(/^\+/, $git->qx(qw(diff HEAD~1..HEAD))));
+	like $cur, qr/Message-ID: <unsubscribe-1/, 'imported new message';
+	unlike $cur, qr/some-UUID-here/, 'List-Unsubscribe gone';
+	unlike $cur, qr/List-Unsubscribe-Post/i, 'List-Unsubscribe-Post gone';
+
+	$in =~ s/unsubscribe-1/unsubscribe-2/g or xbail 'BUG: s// fail';
+	ok(run_script([qw(-learn ham)], undef, $rdr),
+			'learn ham w/ dropUniqueUnsubscribe');
+	$cur = join('', grep(/^\+/, $git->qx(qw(diff HEAD~1..HEAD))));
+	like $cur, qr/Message-ID: <unsubscribe-2/, 'learn ham';
+	unlike $cur, qr/some-UUID-here/, 'List-Unsubscribe gone on learn ham';
+	unlike $cur, qr/List-Unsubscribe-Post/i,
+		'List-Unsubscribe-Post gone on learn ham';
 }
 
 SKIP: {
diff --git a/t/watch_maildir.t b/t/watch_maildir.t
index 29e9bdc5..69a5e1f3 100644
--- a/t/watch_maildir.t
+++ b/t/watch_maildir.t
@@ -6,6 +6,7 @@ use PublicInbox::Eml;
 use Cwd;
 use PublicInbox::TestCommon;
 use PublicInbox::Import;
+use PublicInbox::IO qw(write_file);
 my ($tmpdir, $for_destroy) = tmpdir();
 my $git_dir = "$tmpdir/test.git";
 my $maildir = "$tmpdir/md";
@@ -143,6 +144,10 @@ More majordomo info at  http://vger.kernel.org/majordomo-info.html\n);
 	my $env = { PI_CONFIG => $cfg_path };
 	$git->cleanup;
 
+	write_file '>>', $cfg_path, <<EOM;
+[publicinboxImport]
+	dropUniqueUnsubscribe
+EOM
 	# n.b. --no-scan is only intended for testing atm
 	my $wm = start_script([qw(-watch --no-scan)], $env);
 	no_pollerfd($wm->{pid});
@@ -194,13 +199,32 @@ More majordomo info at  http://vger.kernel.org/majordomo-info.html\n);
 	$em->commit; # wake -watch up
 	diag 'waiting for -watch to import new message';
 	PublicInbox::DS::event_loop();
+
+	my $head = $git->qx(qw(cat-file commit HEAD));
+	my $subj = $eml->header('Subject');
+	like($head, qr/^\Q$subj\E/sm, 'new commit made');
+
+	# try dropUniqueUnsubscribe
+	$delivered = 0;
+	$eml->header_set('Message-ID', '<unsubscribe@example>');
+	$eml->header_set('List-Unsubscribe',
+			'<https://example.com/some-UUID-here/test');
+	$eml->header_set('List-Unsubscribe-Post', 'List-Unsubscribe=One-Click');
+	$em = PublicInbox::Emergency->new($maildir);
+	$em->prepare(\($eml->as_string));
+	$em->commit; # wake -watch up
+	diag 'waiting for -watch to import dropUniqueUnsubscribe message';
+	PublicInbox::DS::event_loop();
+	my $cur = $git->qx(qw(diff HEAD~1..HEAD));
+	like $cur, qr/Message-ID: <unsubscribe\@example>/,
+		'unsubscribe@example imported';
+	unlike $cur, qr/List-Unsubscribe\b/,
+		'List-Unsubscribe-* headers gone w/ dropUniqueUnsubscribe';
+
 	$wm->kill;
 	$wm->join;
 	$ii->close;
 	PublicInbox::DS->Reset;
-	my $head = $git->qx(qw(cat-file commit HEAD));
-	my $subj = $eml->header('Subject');
-	like($head, qr/^\Q$subj\E/sm, 'new commit made');
 }
 
 sub is_maildir {