unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
* [PATCH 0/5] more MH-related updates
@ 2024-01-31 10:20 Eric Wong
  2024-01-31 10:20 ` [PATCH 1/5] lei convert: explicitly allow --sort for inputs Eric Wong
                   ` (4 more replies)
  0 siblings, 5 replies; 6+ messages in thread
From: Eric Wong @ 2024-01-31 10:20 UTC (permalink / raw)
  To: meta

MH can be for slrnpull (and presumably many other NNTP) spools,
too.  I'm thinking 5/5 is a nice quality-of-life improvement
since I didn't want to document --sort=sequence in all the
updated scripts/import_*

Eric Wong (5):
  lei convert: explicitly allow --sort for inputs
  import: drop redundant `use' statement
  scripts/slrnspool2maildir: use MHreader and LeiToMail
  scripts/import_*: update usage to include lei tips
  lei: sort MH inputs sequentially by default

 lib/PublicInbox/Import.pm     |  1 -
 lib/PublicInbox/LeiConvert.pm |  1 +
 lib/PublicInbox/LeiInput.pm   |  2 +-
 lib/PublicInbox/LeiToMail.pm  |  2 +
 lib/PublicInbox/MHreader.pm   |  3 +-
 scripts/import_maildir        | 20 +++++---
 scripts/import_slrnspool      | 26 ++++++----
 scripts/import_vger_from_mbox |  6 +--
 scripts/slrnspool2maildir     | 90 ++++++++++++++++++-----------------
 t/mh_reader.t                 | 15 +++++-
 10 files changed, 102 insertions(+), 64 deletions(-)

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 1/5] lei convert: explicitly allow --sort for inputs
  2024-01-31 10:20 [PATCH 0/5] more MH-related updates Eric Wong
@ 2024-01-31 10:20 ` Eric Wong
  2024-01-31 10:20 ` [PATCH 2/5] import: drop redundant `use' statement Eric Wong
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Eric Wong @ 2024-01-31 10:20 UTC (permalink / raw)
  To: meta

LeiToMail can't sort v2 output, but sorting MH input (and
NNTP spool + mlmmj archives) numerically makes sense.
---
 lib/PublicInbox/LeiConvert.pm | 1 +
 lib/PublicInbox/LeiToMail.pm  | 2 ++
 t/mh_reader.t                 | 9 ++++++++-
 3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/lib/PublicInbox/LeiConvert.pm b/lib/PublicInbox/LeiConvert.pm
index 17a952f2..4d4fceb2 100644
--- a/lib/PublicInbox/LeiConvert.pm
+++ b/lib/PublicInbox/LeiConvert.pm
@@ -52,6 +52,7 @@ sub lei_convert { # the main "lei convert" method
 	my ($lei, @inputs) = @_;
 	$lei->{opt}->{kw} //= 1;
 	$lei->{opt}->{dedupe} //= 'none';
+	$lei->{input_opt}->{sort} = 1; # for LeiToMail conflict check
 	my $self = bless {}, __PACKAGE__;
 	my $ovv = PublicInbox::LeiOverview->new($lei, 'out-format');
 	$lei->{l2m} or return
diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index 9197bb44..a816df6c 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -451,6 +451,8 @@ EOM
 		(-d $dst || (-e _ && !-w _)) and die
 			"$dst exists and is not a writable file\n";
 	}
+	$lei->{input_opt} and # lei_convert sets this
+		@conflict = grep { !$lei->{input_opt}->{$_} } @conflict;
 	my @err = map { defined($lei->{opt}->{$_}) ? "--$_" : () } @conflict;
 	die "@err incompatible with $fmt\n" if @err;
 	$self->{dst} = $dst;
diff --git a/t/mh_reader.t b/t/mh_reader.t
index e8f69fa8..711fc8aa 100644
--- a/t/mh_reader.t
+++ b/t/mh_reader.t
@@ -101,7 +101,14 @@ test_lei(sub {
 	lei_ok qw(index), 'mh:'.$stale;
 	lei qw(q -f mboxrd), 's:msg 4';
 	like $lei_out, qr/^Subject: msg 4\nStatus: RO\n\n\n/ms,
-		"message retrieved after `lei index'"
+		"message retrieved after `lei index'";
+
+	# ensure sort works for _input_ when output disallows sort
+	my $v2out = "$ENV{HOME}/v2-out";
+	lei_ok qw(convert -s sequence), "mh:$for_sort", '-o', "v2:$v2out";
+	my $git = PublicInbox::Git->new("$v2out/git/0.git");
+	chomp(my @l = $git->qx(qw(log --pretty=oneline --format=%s)));
+	is_xdeeply \@l, [1, 22, 333], 'sequence order preserved for v2';
 });
 
 done_testing;

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/5] import: drop redundant `use' statement
  2024-01-31 10:20 [PATCH 0/5] more MH-related updates Eric Wong
  2024-01-31 10:20 ` [PATCH 1/5] lei convert: explicitly allow --sort for inputs Eric Wong
@ 2024-01-31 10:20 ` Eric Wong
  2024-01-31 10:20 ` [PATCH 3/5] scripts/slrnspool2maildir: use MHreader and LeiToMail Eric Wong
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Eric Wong @ 2024-01-31 10:20 UTC (permalink / raw)
  To: meta

We don't need multiple `use PublicInbox::IO' statements to
import a subroutine.
---
 lib/PublicInbox/Import.pm | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm
index e4f8615e..51ddfa7f 100644
--- a/lib/PublicInbox/Import.pm
+++ b/lib/PublicInbox/Import.pm
@@ -16,7 +16,6 @@ use PublicInbox::MsgTime qw(msg_datestamp);
 use PublicInbox::ContentHash qw(content_digest);
 use PublicInbox::MDA;
 use PublicInbox::Eml;
-use PublicInbox::IO;
 use POSIX qw(strftime);
 use autodie qw(socketpair);
 use Carp qw(croak);

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 3/5] scripts/slrnspool2maildir: use MHreader and LeiToMail
  2024-01-31 10:20 [PATCH 0/5] more MH-related updates Eric Wong
  2024-01-31 10:20 ` [PATCH 1/5] lei convert: explicitly allow --sort for inputs Eric Wong
  2024-01-31 10:20 ` [PATCH 2/5] import: drop redundant `use' statement Eric Wong
@ 2024-01-31 10:20 ` Eric Wong
  2024-01-31 10:20 ` [PATCH 4/5] scripts/import_*: update usage to include lei tips Eric Wong
  2024-01-31 10:20 ` [PATCH 5/5] lei: sort MH inputs sequentially by default Eric Wong
  4 siblings, 0 replies; 6+ messages in thread
From: Eric Wong @ 2024-01-31 10:20 UTC (permalink / raw)
  To: meta

This contains gmane-specific header munging to unmunge the
things gmane dones to headers.  While we're at it, document the
generic `lei convert' invocation for users who don't need the
gmane-specific header munging.
---
 scripts/slrnspool2maildir | 90 ++++++++++++++++++++-------------------
 1 file changed, 47 insertions(+), 43 deletions(-)

diff --git a/scripts/slrnspool2maildir b/scripts/slrnspool2maildir
index 8e2ba08a..ba0729ec 100755
--- a/scripts/slrnspool2maildir
+++ b/scripts/slrnspool2maildir
@@ -1,51 +1,55 @@
 #!/usr/bin/perl -w
-# Copyright (C) 2013-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-#
-# One-off script to convert an slrnpull news spool to Maildir
 =begin usage
+One-off script to convert an slrnpull spool from gmane to Maildir
+Note: this contains Gmane-specific header munging to workaround
+the munging done by Gmane.
+
 	./slrnspool2maildir SLRNPULL_ROOT/news/foo/bar /path/to/maildir/
-=cut
-use strict;
-use warnings;
-use Email::Filter;
-use Email::LocalDelivery;
-use File::Glob qw(bsd_glob GLOB_NOSORT);
-sub usage { "Usage:\n".join('',grep(/\t/, `head -n 12 $0`)) }
-my $spool = shift @ARGV or die usage();
-my $dir = shift @ARGV or die usage();
--d $dir or die "$dir is not a directory\n";
-$dir .= '/' unless $dir =~ m!/\z!;
-foreach my $sub (qw(cur new tmp)) {
-	my $nd = "$dir/$sub";
-	-d $nd and next;
-	mkdir $nd or die "mkdir $nd failed: $!\n";
-}
 
-foreach my $n (grep(/\d+\z/, bsd_glob("$spool/*", GLOB_NOSORT))) {
-	if (open my $fh, '<', $n) {
-		my $f = Email::Filter->new(data => do { local $/; <$fh> });
-		my $s = $f->simple;
+A generic replacement w/o Gmane-specific munging could treat
+the slrnpull spool as an MH folder with lei:
 
-		# gmane rewrites Received headers, which increases spamminess
-		# Some older archives set Original-To
-		foreach my $x (qw(Received To)) {
-			my @h = $s->header("Original-$x");
-			if (@h) {
-				$s->header_set($x, @h);
-				$s->header_set("Original-$x");
-			}
+	lei convert mh:SLRNPULL_ROOT/news/foo/bar -o /path/to/maildir
+	# (and `lei daemon-kill' if you don't want the daemon to linger)
+=cut
+use v5.12;
+use autodie;
+# warning: unstable internal APIs:
+use PublicInbox::Eml;
+use PublicInbox::LeiToMail;
+use PublicInbox::MHreader;
+use PublicInbox::IO qw(read_all);
+use File::Path qw(make_path);
+use File::Spec ();
+sub usage {
+	open my $fh, '<', __FILE__;
+	("Usage:\n", grep { /^=begin usage/../^=cut/ and !/^=/m } <$fh>);
+}
+my $spool = shift @ARGV or die usage();
+my $dst = shift @ARGV or die usage();
+$dst .= '/' unless $dst =~ m!/\z!;
+File::Path::make_path(map { $dst.$_ } qw(tmp new cur));
+$dst = File::Spec->rel2abs($dst).'/';
+opendir my $cwdfh, '.';
+my $mhr = PublicInbox::MHreader->new($spool, $cwdfh);
+my $smsg;
+$mhr->mh_each_eml(sub {
+	my ($d, $n, $kw, $eml) = @_;
+	# gmane rewrites Received headers, which increases spamminess
+	# Some older archives set Original-To
+	for my $x (qw(Received To)) {
+		my @h = $eml->header_raw("Original-$x");
+		if (@h) {
+			$eml->header_set($x, @h);
+			$eml->header_set("Original-$x");
 		}
-
-		# triggers for the SA HEADER_SPAM rule
-		foreach my $drop (qw(Approved)) { $s->header_set($drop) }
-
-		# appears to be an old gmane bug:
-		$s->header_set('connect()');
-
-		$f->exit(0);
-		$f->accept($dir);
-	} else {
-		warn "Failed to open $n: $!\n";
 	}
-}
+	# `Approved' triggers the SA HEADER_SPAM rule
+	# `connect()' appears to be an old gmane bug:
+	$eml->header_set($_) for ('Approved', 'connect()');
+	my $buf = $eml->as_string;
+	$smsg->{blob} = $n;
+	PublicInbox::LeiToMail::_buf2maildir($dst, \$buf, $smsg, 'new/');
+});

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 4/5] scripts/import_*: update usage to include lei tips
  2024-01-31 10:20 [PATCH 0/5] more MH-related updates Eric Wong
                   ` (2 preceding siblings ...)
  2024-01-31 10:20 ` [PATCH 3/5] scripts/slrnspool2maildir: use MHreader and LeiToMail Eric Wong
@ 2024-01-31 10:20 ` Eric Wong
  2024-01-31 10:20 ` [PATCH 5/5] lei: sort MH inputs sequentially by default Eric Wong
  4 siblings, 0 replies; 6+ messages in thread
From: Eric Wong @ 2024-01-31 10:20 UTC (permalink / raw)
  To: meta

These scripts probably don't offer anything useful now that
lei has fleshed out read-only MH support and v2 outputs.
---
 scripts/import_maildir        | 20 ++++++++++++++------
 scripts/import_slrnspool      | 26 ++++++++++++++++++--------
 scripts/import_vger_from_mbox |  6 +++---
 3 files changed, 35 insertions(+), 17 deletions(-)

diff --git a/scripts/import_maildir b/scripts/import_maildir
index 269f2550..7228a3ad 100755
--- a/scripts/import_maildir
+++ b/scripts/import_maildir
@@ -1,21 +1,29 @@
 #!/usr/bin/perl -w
-# Copyright (C) 2014, Eric Wong <e@80x24.org> and all contributors
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-#
-# Script to import a Maildir into a public-inbox
 =begin usage
+Ancient script to import a Maildir into a v1 public-inbox
+
+	# this is only if you want a v1 inbox
 	export GIT_DIR=/path/to/your/repo.git
 	export GIT_AUTHOR_EMAIL='list@example.com'
 	export GIT_AUTHOR_NAME='list name'
 	./import_maildir /path/to/maildir/
+
+For v2 (strongly recommended), use:
+
+	lei convert /path/to/maildir -o /path/to/v2-inbox
+	# (and `lei daemon-kill' if you don't want the daemon to linger)
 =cut
-use strict;
-use warnings;
+use v5.12;
 use Date::Parse qw/str2time/;
 use PublicInbox::Eml;
 use PublicInbox::Git;
 use PublicInbox::Import;
-sub usage { "Usage:\n".join('', grep(/\t/, `head -n 24 $0`)) }
+sub usage {
+	open my $fh, '<', __FILE__;
+	("Usage:\n", grep { /^=begin usage/../^=cut/ and !/^=/m } <$fh>);
+}
 my $dir = shift @ARGV or die usage();
 my $git_dir = `git rev-parse --git-dir`;
 chomp $git_dir;
diff --git a/scripts/import_slrnspool b/scripts/import_slrnspool
index d9a35dfd..81df6c2e 100755
--- a/scripts/import_slrnspool
+++ b/scripts/import_slrnspool
@@ -1,20 +1,30 @@
 #!/usr/bin/perl -w
-# Copyright (C) 2015-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-#
-# Incremental (or one-shot) importer of a slrnpull news spool
 =begin usage
+Incremental (or one-shot) importer of a slrnpull news spool.
+
+Since the news spool can appear as an MH folder, you may also use
+lei from public-inbox 2.0+ to convert it:
+
+	lei convert mh:$SLRNPULL_ROOT/news/foo/bar -o v2:/path/to/inbox/
+	# (and `lei daemon-kill' if you don't want the daemon to linger)
+
+But if you want to use this script:
+
 	export ORIGINAL_RECIPIENT=address@example.com
-	public-inbox-init $INBOX $GIT_DIR $HTTP_URL $ORIGINAL_RECIPIENT
-	./import_slrnspool SLRNPULL_ROOT/news/foo/bar
+	public-inbox-init -V2 $INBOX $INBOX_DIR $HTTP_URL $ORIGINAL_RECIPIENT
+	./import_slrnspool $SLRNPULL_ROOT/news/foo/bar
 =cut
-use strict;
-use warnings;
+use v5.12;
 use PublicInbox::Config;
 use PublicInbox::Eml;
 use PublicInbox::Import;
 use PublicInbox::Git;
-sub usage { "Usage:\n".join('',grep(/\t/, `head -n 10 $0`)) }
+sub usage {
+	open my $fh, '<', __FILE__;
+	("Usage:\n", grep { /^=begin usage/../^=cut/ and !/^=/m } <$fh>);
+}
 my $exit = 0;
 my $sighandler = sub { $exit = 1 };
 $SIG{INT} = $sighandler;
diff --git a/scripts/import_vger_from_mbox b/scripts/import_vger_from_mbox
index c33e42e4..40ccf50b 100644
--- a/scripts/import_vger_from_mbox
+++ b/scripts/import_vger_from_mbox
@@ -1,8 +1,8 @@
 #!/usr/bin/perl -w
-# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-use strict;
-use warnings;
+# consider `lei convert' instead since it handles more formats
+use v5.12;
 use Getopt::Long qw/:config gnu_getopt no_ignore_case auto_abbrev/;
 use PublicInbox::InboxWritable;
 my $usage = "usage: $0 NAME EMAIL DIR <MBOX\n";

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 5/5] lei: sort MH inputs sequentially by default
  2024-01-31 10:20 [PATCH 0/5] more MH-related updates Eric Wong
                   ` (3 preceding siblings ...)
  2024-01-31 10:20 ` [PATCH 4/5] scripts/import_*: update usage to include lei tips Eric Wong
@ 2024-01-31 10:20 ` Eric Wong
  4 siblings, 0 replies; 6+ messages in thread
From: Eric Wong @ 2024-01-31 10:20 UTC (permalink / raw)
  To: meta

MH sequence numbers can be analogous to IMAP UIDs and NNTP
article numbers (or more like IMAP MSNs with clients which
pack).  In any case, sort then numerically by default to avoid
surprising users who treat NNTP spools and mlmmj archives as MH
folders.  This gives more coherent git history and resulting
NNTP/IMAP numbering when round-tripping MH -> v2 -> (NNTP|IMAP) -> MH
---
 lib/PublicInbox/LeiInput.pm |  2 +-
 lib/PublicInbox/MHreader.pm |  3 ++-
 t/mh_reader.t               | 14 ++++++++++----
 3 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/lib/PublicInbox/LeiInput.pm b/lib/PublicInbox/LeiInput.pm
index 947a7a79..d003d983 100644
--- a/lib/PublicInbox/LeiInput.pm
+++ b/lib/PublicInbox/LeiInput.pm
@@ -242,7 +242,7 @@ sub input_path_url {
 		}
 	} elsif (-d _ && $ifmt eq 'mh') {
 		my $mhr = PublicInbox::MHreader->new($input.'/', $lei->{3});
-		$mhr->{sort} = $lei->{opt}->{sort};
+		$mhr->{sort} = $lei->{opt}->{sort} // [ 'sequence'];
 		$mhr->mh_each_eml($self->can('input_mh_cb'), $self, @args);
 	} elsif (-d _ && $ifmt =~ /\A(?:v1|v2)\z/) {
 		my $ibx = PublicInbox::Inbox->new({inboxdir => $input});
diff --git a/lib/PublicInbox/MHreader.pm b/lib/PublicInbox/MHreader.pm
index 033aa740..3e7bbd5c 100644
--- a/lib/PublicInbox/MHreader.pm
+++ b/lib/PublicInbox/MHreader.pm
@@ -54,7 +54,8 @@ sub mh_each_file {
 	opendir(my $dh, my $dir = $self->{dir});
 	my $restore = PublicInbox::OnDestroy->new($$, \&chdir, $self->{cwdfh});
 	chdir($dh);
-	if (defined(my $sort = $self->{sort})) {
+	my $sort = $self->{sort};
+	if (defined $sort && "@$sort" ne 'none') {
 		my @sort = map {
 			my @tmp = $_ eq '' ? ('sequence') : split(/[, ]/);
 			# sorting by name alphabetically makes no sense for MH:
diff --git a/t/mh_reader.t b/t/mh_reader.t
index 711fc8aa..c81df32e 100644
--- a/t/mh_reader.t
+++ b/t/mh_reader.t
@@ -7,6 +7,7 @@ use PublicInbox::IO qw(write_file);
 use PublicInbox::Lock;
 use PublicInbox::OnDestroy;
 use PublicInbox::Eml;
+use File::Path qw(remove_tree);
 use autodie;
 opendir my $cwdfh, '.';
 
@@ -103,12 +104,17 @@ test_lei(sub {
 	like $lei_out, qr/^Subject: msg 4\nStatus: RO\n\n\n/ms,
 		"message retrieved after `lei index'";
 
+	lei_ok qw(convert -s none -f text), "mh:$for_sort", \'--sort=none';
+
 	# ensure sort works for _input_ when output disallows sort
 	my $v2out = "$ENV{HOME}/v2-out";
-	lei_ok qw(convert -s sequence), "mh:$for_sort", '-o', "v2:$v2out";
-	my $git = PublicInbox::Git->new("$v2out/git/0.git");
-	chomp(my @l = $git->qx(qw(log --pretty=oneline --format=%s)));
-	is_xdeeply \@l, [1, 22, 333], 'sequence order preserved for v2';
+	for my $sort (['--sort=sequence'], []) { # sequence is the default
+		lei_ok qw(convert), @$sort, "mh:$for_sort", '-o', "v2:$v2out";
+		my $g = PublicInbox::Git->new("$v2out/git/0.git");
+		chomp(my @l = $g->qx(qw(log --pretty=oneline --format=%s)));
+		is_xdeeply \@l, [1, 22, 333], 'sequence order preserved for v2';
+		File::Path::remove_tree $v2out;
+	}
 });
 
 done_testing;

^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2024-01-31 10:20 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-01-31 10:20 [PATCH 0/5] more MH-related updates Eric Wong
2024-01-31 10:20 ` [PATCH 1/5] lei convert: explicitly allow --sort for inputs Eric Wong
2024-01-31 10:20 ` [PATCH 2/5] import: drop redundant `use' statement Eric Wong
2024-01-31 10:20 ` [PATCH 3/5] scripts/slrnspool2maildir: use MHreader and LeiToMail Eric Wong
2024-01-31 10:20 ` [PATCH 4/5] scripts/import_*: update usage to include lei tips Eric Wong
2024-01-31 10:20 ` [PATCH 5/5] lei: sort MH inputs sequentially by default Eric Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).