From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <e@80x24.org>
X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on dcvr.yhbt.net
X-Spam-Level: 
X-Spam-ASN:  
X-Spam-Status: No, score=-4.2 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00,
	DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF,
	T_SCC_BODY_TEXT_LINE shortcircuit=no autolearn=ham autolearn_force=no
	version=3.4.6
Received: from localhost (dcvr.yhbt.net [127.0.0.1])
	by dcvr.yhbt.net (Postfix) with ESMTP id B5C6F1F4C1
	for <meta@public-inbox.org>; Wed, 31 Jan 2024 10:20:21 +0000 (UTC)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org;
	s=selector1; t=1706696421;
	bh=wQkkOxYtt3YokPE1m4lzaaPG+/b4XamGUGN3rLeINiU=;
	h=From:To:Subject:Date:In-Reply-To:References:From;
	b=RmYfa19Z5dogEKmbLvJn+Xe3bWarmHcMr+N/nFZK/XN36S9kSX9HTaB8UF94tkk9l
	 cpobv3rmJFEi/Pf/P4w532uufQnJakYTZr3t/kx46mUgsp+G/5x48D3R7bfn/uO3Hk
	 ORHvK+GQYG3HZVJFMfXHDL+kLoBgz+Dii3/jax4s=
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 3/5] scripts/slrnspool2maildir: use MHreader and LeiToMail
Date: Wed, 31 Jan 2024 10:20:18 +0000
Message-ID: <20240131102021.1257902-4-e@80x24.org>
In-Reply-To: <20240131102021.1257902-1-e@80x24.org>
References: <20240131102021.1257902-1-e@80x24.org>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
List-Id: <meta.public-inbox.org>

This contains gmane-specific header munging to unmunge the
things gmane dones to headers.  While we're at it, document the
generic `lei convert' invocation for users who don't need the
gmane-specific header munging.
---
 scripts/slrnspool2maildir | 90 ++++++++++++++++++++-------------------
 1 file changed, 47 insertions(+), 43 deletions(-)

diff --git a/scripts/slrnspool2maildir b/scripts/slrnspool2maildir
index 8e2ba08a..ba0729ec 100755
--- a/scripts/slrnspool2maildir
+++ b/scripts/slrnspool2maildir
@@ -1,51 +1,55 @@
 #!/usr/bin/perl -w
-# Copyright (C) 2013-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-#
-# One-off script to convert an slrnpull news spool to Maildir
 =begin usage
+One-off script to convert an slrnpull spool from gmane to Maildir
+Note: this contains Gmane-specific header munging to workaround
+the munging done by Gmane.
+
 	./slrnspool2maildir SLRNPULL_ROOT/news/foo/bar /path/to/maildir/
-=cut
-use strict;
-use warnings;
-use Email::Filter;
-use Email::LocalDelivery;
-use File::Glob qw(bsd_glob GLOB_NOSORT);
-sub usage { "Usage:\n".join('',grep(/\t/, `head -n 12 $0`)) }
-my $spool = shift @ARGV or die usage();
-my $dir = shift @ARGV or die usage();
--d $dir or die "$dir is not a directory\n";
-$dir .= '/' unless $dir =~ m!/\z!;
-foreach my $sub (qw(cur new tmp)) {
-	my $nd = "$dir/$sub";
-	-d $nd and next;
-	mkdir $nd or die "mkdir $nd failed: $!\n";
-}
 
-foreach my $n (grep(/\d+\z/, bsd_glob("$spool/*", GLOB_NOSORT))) {
-	if (open my $fh, '<', $n) {
-		my $f = Email::Filter->new(data => do { local $/; <$fh> });
-		my $s = $f->simple;
+A generic replacement w/o Gmane-specific munging could treat
+the slrnpull spool as an MH folder with lei:
 
-		# gmane rewrites Received headers, which increases spamminess
-		# Some older archives set Original-To
-		foreach my $x (qw(Received To)) {
-			my @h = $s->header("Original-$x");
-			if (@h) {
-				$s->header_set($x, @h);
-				$s->header_set("Original-$x");
-			}
+	lei convert mh:SLRNPULL_ROOT/news/foo/bar -o /path/to/maildir
+	# (and `lei daemon-kill' if you don't want the daemon to linger)
+=cut
+use v5.12;
+use autodie;
+# warning: unstable internal APIs:
+use PublicInbox::Eml;
+use PublicInbox::LeiToMail;
+use PublicInbox::MHreader;
+use PublicInbox::IO qw(read_all);
+use File::Path qw(make_path);
+use File::Spec ();
+sub usage {
+	open my $fh, '<', __FILE__;
+	("Usage:\n", grep { /^=begin usage/../^=cut/ and !/^=/m } <$fh>);
+}
+my $spool = shift @ARGV or die usage();
+my $dst = shift @ARGV or die usage();
+$dst .= '/' unless $dst =~ m!/\z!;
+File::Path::make_path(map { $dst.$_ } qw(tmp new cur));
+$dst = File::Spec->rel2abs($dst).'/';
+opendir my $cwdfh, '.';
+my $mhr = PublicInbox::MHreader->new($spool, $cwdfh);
+my $smsg;
+$mhr->mh_each_eml(sub {
+	my ($d, $n, $kw, $eml) = @_;
+	# gmane rewrites Received headers, which increases spamminess
+	# Some older archives set Original-To
+	for my $x (qw(Received To)) {
+		my @h = $eml->header_raw("Original-$x");
+		if (@h) {
+			$eml->header_set($x, @h);
+			$eml->header_set("Original-$x");
 		}
-
-		# triggers for the SA HEADER_SPAM rule
-		foreach my $drop (qw(Approved)) { $s->header_set($drop) }
-
-		# appears to be an old gmane bug:
-		$s->header_set('connect()');
-
-		$f->exit(0);
-		$f->accept($dir);
-	} else {
-		warn "Failed to open $n: $!\n";
 	}
-}
+	# `Approved' triggers the SA HEADER_SPAM rule
+	# `connect()' appears to be an old gmane bug:
+	$eml->header_set($_) for ('Approved', 'connect()');
+	my $buf = $eml->as_string;
+	$smsg->{blob} = $n;
+	PublicInbox::LeiToMail::_buf2maildir($dst, \$buf, $smsg, 'new/');
+});