From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: <e@80x24.org> X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.2 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF, T_SCC_BODY_TEXT_LINE shortcircuit=no autolearn=ham autolearn_force=no version=3.4.6 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id B5C6F1F4C1 for <meta@public-inbox.org>; Wed, 31 Jan 2024 10:20:21 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1706696421; bh=wQkkOxYtt3YokPE1m4lzaaPG+/b4XamGUGN3rLeINiU=; h=From:To:Subject:Date:In-Reply-To:References:From; b=RmYfa19Z5dogEKmbLvJn+Xe3bWarmHcMr+N/nFZK/XN36S9kSX9HTaB8UF94tkk9l cpobv3rmJFEi/Pf/P4w532uufQnJakYTZr3t/kx46mUgsp+G/5x48D3R7bfn/uO3Hk ORHvK+GQYG3HZVJFMfXHDL+kLoBgz+Dii3/jax4s= From: Eric Wong <e@80x24.org> To: meta@public-inbox.org Subject: [PATCH 3/5] scripts/slrnspool2maildir: use MHreader and LeiToMail Date: Wed, 31 Jan 2024 10:20:18 +0000 Message-ID: <20240131102021.1257902-4-e@80x24.org> In-Reply-To: <20240131102021.1257902-1-e@80x24.org> References: <20240131102021.1257902-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: <meta.public-inbox.org> This contains gmane-specific header munging to unmunge the things gmane dones to headers. While we're at it, document the generic `lei convert' invocation for users who don't need the gmane-specific header munging. --- scripts/slrnspool2maildir | 90 ++++++++++++++++++++------------------- 1 file changed, 47 insertions(+), 43 deletions(-) diff --git a/scripts/slrnspool2maildir b/scripts/slrnspool2maildir index 8e2ba08a..ba0729ec 100755 --- a/scripts/slrnspool2maildir +++ b/scripts/slrnspool2maildir @@ -1,51 +1,55 @@ #!/usr/bin/perl -w -# Copyright (C) 2013-2021 all contributors <meta@public-inbox.org> +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> -# -# One-off script to convert an slrnpull news spool to Maildir =begin usage +One-off script to convert an slrnpull spool from gmane to Maildir +Note: this contains Gmane-specific header munging to workaround +the munging done by Gmane. + ./slrnspool2maildir SLRNPULL_ROOT/news/foo/bar /path/to/maildir/ -=cut -use strict; -use warnings; -use Email::Filter; -use Email::LocalDelivery; -use File::Glob qw(bsd_glob GLOB_NOSORT); -sub usage { "Usage:\n".join('',grep(/\t/, `head -n 12 $0`)) } -my $spool = shift @ARGV or die usage(); -my $dir = shift @ARGV or die usage(); --d $dir or die "$dir is not a directory\n"; -$dir .= '/' unless $dir =~ m!/\z!; -foreach my $sub (qw(cur new tmp)) { - my $nd = "$dir/$sub"; - -d $nd and next; - mkdir $nd or die "mkdir $nd failed: $!\n"; -} -foreach my $n (grep(/\d+\z/, bsd_glob("$spool/*", GLOB_NOSORT))) { - if (open my $fh, '<', $n) { - my $f = Email::Filter->new(data => do { local $/; <$fh> }); - my $s = $f->simple; +A generic replacement w/o Gmane-specific munging could treat +the slrnpull spool as an MH folder with lei: - # gmane rewrites Received headers, which increases spamminess - # Some older archives set Original-To - foreach my $x (qw(Received To)) { - my @h = $s->header("Original-$x"); - if (@h) { - $s->header_set($x, @h); - $s->header_set("Original-$x"); - } + lei convert mh:SLRNPULL_ROOT/news/foo/bar -o /path/to/maildir + # (and `lei daemon-kill' if you don't want the daemon to linger) +=cut +use v5.12; +use autodie; +# warning: unstable internal APIs: +use PublicInbox::Eml; +use PublicInbox::LeiToMail; +use PublicInbox::MHreader; +use PublicInbox::IO qw(read_all); +use File::Path qw(make_path); +use File::Spec (); +sub usage { + open my $fh, '<', __FILE__; + ("Usage:\n", grep { /^=begin usage/../^=cut/ and !/^=/m } <$fh>); +} +my $spool = shift @ARGV or die usage(); +my $dst = shift @ARGV or die usage(); +$dst .= '/' unless $dst =~ m!/\z!; +File::Path::make_path(map { $dst.$_ } qw(tmp new cur)); +$dst = File::Spec->rel2abs($dst).'/'; +opendir my $cwdfh, '.'; +my $mhr = PublicInbox::MHreader->new($spool, $cwdfh); +my $smsg; +$mhr->mh_each_eml(sub { + my ($d, $n, $kw, $eml) = @_; + # gmane rewrites Received headers, which increases spamminess + # Some older archives set Original-To + for my $x (qw(Received To)) { + my @h = $eml->header_raw("Original-$x"); + if (@h) { + $eml->header_set($x, @h); + $eml->header_set("Original-$x"); } - - # triggers for the SA HEADER_SPAM rule - foreach my $drop (qw(Approved)) { $s->header_set($drop) } - - # appears to be an old gmane bug: - $s->header_set('connect()'); - - $f->exit(0); - $f->accept($dir); - } else { - warn "Failed to open $n: $!\n"; } -} + # `Approved' triggers the SA HEADER_SPAM rule + # `connect()' appears to be an old gmane bug: + $eml->header_set($_) for ('Approved', 'connect()'); + my $buf = $eml->as_string; + $smsg->{blob} = $n; + PublicInbox::LeiToMail::_buf2maildir($dst, \$buf, $smsg, 'new/'); +});