unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
* [PATCH 0/5] lei_to_mail fixes
@ 2021-04-05 10:27 Eric Wong
  2021-04-05 10:27 ` [PATCH 1/5] lei_to_mail: trim down imports Eric Wong
                   ` (4 more replies)
  0 siblings, 5 replies; 6+ messages in thread
From: Eric Wong @ 2021-04-05 10:27 UTC (permalink / raw)
  To: meta

Some stuff I noticed while preparing for saved searches

Eric Wong (5):
  lei_to_mail: trim down imports
  lei_tag: fix comments w.r.t support levels
  lei: maildir: move shard support to MdirReader
  lei_to_mail: improve comments and reduce LoC
  lei q: fix auth IMAP --output with remote mboxrd

 lib/PublicInbox/InboxWritable.pm |  4 +-
 lib/PublicInbox/LeiInput.pm      |  2 +-
 lib/PublicInbox/LeiOverview.pm   | 14 ++---
 lib/PublicInbox/LeiTag.pm        |  8 +--
 lib/PublicInbox/LeiToMail.pm     | 92 ++++++++++++--------------------
 lib/PublicInbox/LeiXSearch.pm    |  4 +-
 lib/PublicInbox/MdirReader.pm    | 25 +++++++--
 t/lei-convert.t                  |  2 +-
 t/lei_to_mail.t                  |  8 +--
 9 files changed, 73 insertions(+), 86 deletions(-)

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 1/5] lei_to_mail: trim down imports
  2021-04-05 10:27 [PATCH 0/5] lei_to_mail fixes Eric Wong
@ 2021-04-05 10:27 ` Eric Wong
  2021-04-05 10:27 ` [PATCH 2/5] lei_tag: fix comments w.r.t support levels Eric Wong
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Eric Wong @ 2021-04-05 10:27 UTC (permalink / raw)
  To: meta

We don't need to import so many things.  None of the Errno
constants are in common paths so unlikely to benefit from
constant folding.
---
 lib/PublicInbox/LeiToMail.pm | 18 +++++-------------
 1 file changed, 5 insertions(+), 13 deletions(-)

diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index 0364d8ef..76a11b0e 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -7,23 +7,15 @@ use strict;
 use v5.10.1;
 use parent qw(PublicInbox::IPC);
 use PublicInbox::Eml;
-use PublicInbox::Lock;
 use PublicInbox::ProcessPipe;
 use PublicInbox::Spawn qw(spawn);
 use PublicInbox::LeiDedupe;
-use PublicInbox::Git;
-use PublicInbox::GitAsyncCat;
 use PublicInbox::PktOp qw(pkt_do);
 use Symbol qw(gensym);
 use IO::Handle; # ->autoflush
 use Fcntl qw(SEEK_SET SEEK_END O_CREAT O_EXCL O_WRONLY);
-use Errno qw(EEXIST ESPIPE ENOENT EPIPE);
 use Digest::SHA qw(sha256_hex);
 
-# struggles with short-lived repos, Gcf2Client makes little sense with lei;
-# but we may use in-process libgit2 in the future.
-$PublicInbox::GitAsyncCat::GCF2C = 0;
-
 my %kw2char = ( # Maildir characters
 	draft => 'D',
 	flagged => 'F',
@@ -76,7 +68,7 @@ sub atomic_append { # for on-disk destinations (O_APPEND, or O_EXCL)
 	if (defined(my $w = syswrite($lei->{1} // return, $$buf))) {
 		return if $w == length($$buf);
 		$buf = "short atomic write: $w != ".length($$buf);
-	} elsif ($! == EPIPE) {
+	} elsif ($!{EPIPE}) {
 		return $lei->note_sigpipe(1);
 	} else {
 		$buf = "atomic write: $!";
@@ -275,7 +267,7 @@ sub _buf2maildir {
 	do {
 		$tmp = $dst.'tmp/'.$rand.$common;
 	} while (!($ok = sysopen($fh, $tmp, O_CREAT|O_EXCL|O_WRONLY)) &&
-		$! == EEXIST && ($rand = _rand.','));
+		$!{EEXIST} && ($rand = _rand.','));
 	if ($ok && print $fh $$buf and close($fh)) {
 		# ignore new/ and write only to cur/, otherwise MUAs
 		# with R/W access to the Maildir will end up doing
@@ -285,7 +277,7 @@ sub _buf2maildir {
 		$rand = '';
 		do {
 			$final = $dst.$rand.$common.':2,'.$sfx;
-		} while (!($ok = link($tmp, $final)) && $! == EEXIST &&
+		} while (!($ok = link($tmp, $final)) && $!{EEXIST} &&
 			($rand = _rand.','));
 		die "link($tmp, $final): $!" unless $ok;
 		unlink($tmp) or warn "W: failed to unlink $tmp: $!\n";
@@ -473,7 +465,7 @@ sub _pre_augment_mbox {
 	}
 	# Perl does SEEK_END even with O_APPEND :<
 	$self->{seekable} = seek($out, 0, SEEK_SET);
-	if (!$self->{seekable} && $! != ESPIPE && !defined($devfd)) {
+	if (!$self->{seekable} && !$!{ESPIPE} && !defined($devfd)) {
 		die "seek($dst): $!\n";
 	}
 	if (!$self->{seekable}) {
@@ -610,7 +602,7 @@ sub poke_dst {
 
 sub write_mail { # via ->wq_io_do
 	my ($self, $smsg) = @_;
-	git_async_cat($self->{lei}->{ale}->git, $smsg->{blob}, \&git_to_mail,
+	$self->{lei}->{ale}->git->cat_async($smsg->{blob}, \&git_to_mail,
 				[$self->{wcb}, $smsg]);
 }
 

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/5] lei_tag: fix comments w.r.t support levels
  2021-04-05 10:27 [PATCH 0/5] lei_to_mail fixes Eric Wong
  2021-04-05 10:27 ` [PATCH 1/5] lei_to_mail: trim down imports Eric Wong
@ 2021-04-05 10:27 ` Eric Wong
  2021-04-05 10:27 ` [PATCH 3/5] lei: maildir: move shard support to MdirReader Eric Wong
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Eric Wong @ 2021-04-05 10:27 UTC (permalink / raw)
  To: meta

RFC 8621 registers $flagged, $answered, $seen, $draft which
map to IMAP, Maildir, and mbox Status/X-Status flags.

$forwarded is noted in JMAP, but only Maildir and and the
"Lemonade" IMAP profile (RFC 5550) support it
---
 lib/PublicInbox/LeiTag.pm | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/lib/PublicInbox/LeiTag.pm b/lib/PublicInbox/LeiTag.pm
index c7a21c87..1dfc841d 100644
--- a/lib/PublicInbox/LeiTag.pm
+++ b/lib/PublicInbox/LeiTag.pm
@@ -8,11 +8,13 @@ use v5.10.1;
 use parent qw(PublicInbox::IPC PublicInbox::LeiInput);
 
 # JMAP RFC 8621 4.1.1
-my @KW = (qw(seen answered flagged draft), # system
-	qw(forwarded phishing junk notjunk)); # reserved
+# https://www.iana.org/assignments/imap-jmap-keywords/imap-jmap-keywords.xhtml
+my @KW = (qw(seen answered flagged draft), # widely-compatible
+	qw(forwarded phishing junk notjunk)); # rarely supported
 # note: RFC 8621 states "Users may add arbitrary keywords to an Email",
 # but is it good idea?  Stick to the system and reserved ones, for now.
-# The "system" ones map to Maildir flags and mbox Status/X-Status headers.
+# The widely-compatible ones map to IMAP system flags, Maildir flags
+# and mbox Status/X-Status headers.
 my %KW = map { $_ => 1 } @KW;
 my $L_MAX = 244; # Xapian term limit - length('L')
 

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 3/5] lei: maildir: move shard support to MdirReader
  2021-04-05 10:27 [PATCH 0/5] lei_to_mail fixes Eric Wong
  2021-04-05 10:27 ` [PATCH 1/5] lei_to_mail: trim down imports Eric Wong
  2021-04-05 10:27 ` [PATCH 2/5] lei_tag: fix comments w.r.t support levels Eric Wong
@ 2021-04-05 10:27 ` Eric Wong
  2021-04-05 10:27 ` [PATCH 4/5] lei_to_mail: improve comments and reduce LoC Eric Wong
  2021-04-05 10:27 ` [PATCH 5/5] lei q: fix auth IMAP --output with remote mboxrd Eric Wong
  4 siblings, 0 replies; 6+ messages in thread
From: Eric Wong @ 2021-04-05 10:27 UTC (permalink / raw)
  To: meta

We'll eventually want lei_input users like "lei import" and
"lei tag" to support parallel reads.
---
 lib/PublicInbox/InboxWritable.pm |  4 ++--
 lib/PublicInbox/LeiInput.pm      |  2 +-
 lib/PublicInbox/LeiToMail.pm     | 29 +++++++++--------------------
 lib/PublicInbox/MdirReader.pm    | 25 +++++++++++++++++++++----
 t/lei-convert.t                  |  2 +-
 t/lei_to_mail.t                  |  8 ++++----
 6 files changed, 38 insertions(+), 32 deletions(-)

diff --git a/lib/PublicInbox/InboxWritable.pm b/lib/PublicInbox/InboxWritable.pm
index eeebc485..45d8cdc7 100644
--- a/lib/PublicInbox/InboxWritable.pm
+++ b/lib/PublicInbox/InboxWritable.pm
@@ -154,8 +154,8 @@ sub import_maildir {
 	my $im = $self->importer(1);
 	my @self = $self->filter($im) ? ($self) : ();
 	require PublicInbox::MdirReader;
-	PublicInbox::MdirReader::maildir_each_file(\&_each_maildir_fn,
-						$im, @self);
+	PublicInbox::MdirReader->new->maildir_each_file(\&_each_maildir_fn,
+							$im, @self);
 	$im->done;
 }
 
diff --git a/lib/PublicInbox/LeiInput.pm b/lib/PublicInbox/LeiInput.pm
index 40d71f9e..e416d3ed 100644
--- a/lib/PublicInbox/LeiInput.pm
+++ b/lib/PublicInbox/LeiInput.pm
@@ -88,7 +88,7 @@ sub input_path_url {
 		return $lei->fail(<<EOM) if $ifmt && $ifmt ne 'maildir';
 $input appears to a be a maildir, not $ifmt
 EOM
-		PublicInbox::MdirReader::maildir_each_eml($input,
+		PublicInbox::MdirReader->new->maildir_each_eml($input,
 					$self->can('input_maildir_cb'),
 					$self, @args);
 	} else {
diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index 76a11b0e..2e736070 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -14,7 +14,6 @@ use PublicInbox::PktOp qw(pkt_do);
 use Symbol qw(gensym);
 use IO::Handle; # ->autoflush
 use Fcntl qw(SEEK_SET SEEK_END O_CREAT O_EXCL O_WRONLY);
-use Digest::SHA qw(sha256_hex);
 
 my %kw2char = ( # Maildir characters
 	draft => 'D',
@@ -234,17 +233,9 @@ sub update_kw_maybe ($$$$) {
 	}
 }
 
-sub _augment_or_unlink { # maildir_each_eml cb
-	my ($f, $kw, $eml, $lei, $lse, $mod, $shard, $unlink) = @_;
-	if ($mod) {
-		# can't get dirent.d_ino w/ pure Perl readdir, so we extract
-		# the OID if it looks like one instead of doing stat(2)
-		my $hex = $f =~ m!\b([a-f0-9]{40,})[^/]*\z! ?
-				$1 : sha256_hex($f);
-		my $recno = hex(substr($hex, 0, 8));
-		return if ($recno % $mod) != $shard;
-		update_kw_maybe($lei, $lse, $eml, $kw);
-	}
+sub _md_update { # maildir_each_eml cb
+	my ($f, $kw, $eml, $lei, $lse, $unlink) = @_;
+	update_kw_maybe($lei, $lse, $eml, $kw);
 	$unlink ? unlink($f) : _augment($eml, $lei);
 }
 
@@ -392,21 +383,19 @@ sub _do_augment_maildir {
 	my ($self, $lei) = @_;
 	my $dst = $lei->{ovv}->{dst};
 	my $lse = $lei->{opt}->{'import-before'} ? $lei->{lse} : undef;
-	my ($mod, $shard) = @{$self->{shard_info} // []};
+	my $mdr = PublicInbox::MdirReader->new;
 	if ($lei->{opt}->{augment}) {
 		my $dedupe = $lei->{dedupe};
 		if ($dedupe && $dedupe->prepare_dedupe) {
-			PublicInbox::MdirReader::maildir_each_eml($dst,
-						\&_augment_or_unlink,
-						$lei, $lse, $mod, $shard);
+			$mdr->{shard_info} = $self->{shard_info};
+			$mdr->maildir_each_eml($dst, \&_md_update, $lei, $lse);
 			$dedupe->pause_dedupe;
 		}
 	} elsif ($lse) {
-		PublicInbox::MdirReader::maildir_each_eml($dst,
-					\&_augment_or_unlink,
-					$lei, $lse, $mod, $shard, 1);
+		$mdr->{shard_info} = $self->{shard_info};
+		$mdr->maildir_each_eml($dst, \&_md_update, $lei, $lse, 1);
 	} else {# clobber existing Maildir
-		PublicInbox::MdirReader::maildir_each_file($dst, \&_unlink);
+		$mdr->maildir_each_file($dst, \&_unlink);
 	}
 }
 
diff --git a/lib/PublicInbox/MdirReader.pm b/lib/PublicInbox/MdirReader.pm
index 1685e4d8..b49c8ceb 100644
--- a/lib/PublicInbox/MdirReader.pm
+++ b/lib/PublicInbox/MdirReader.pm
@@ -8,6 +8,7 @@ package PublicInbox::MdirReader;
 use strict;
 use v5.10.1;
 use PublicInbox::InboxWritable qw(eml_from_path);
+use Digest::SHA qw(sha256_hex);
 
 # returns Maildir flags from a basename ('' for no flags, undef for invalid)
 sub maildir_basename_flags {
@@ -24,14 +25,25 @@ sub maildir_path_flags {
 	$i >= 0 ? maildir_basename_flags(substr($f, $i + 1)) : undef;
 }
 
-sub maildir_each_file ($$;@) {
-	my ($dir, $cb, @arg) = @_;
+sub shard_ok ($$$) {
+	my ($bn, $mod, $shard) = @_;
+	# can't get dirent.d_ino w/ pure Perl readdir, so we extract
+	# the OID if it looks like one instead of doing stat(2)
+	my $hex = $bn =~ m!\A([a-f0-9]{40,})! ? $1 : sha256_hex($bn);
+	my $recno = hex(substr($hex, 0, 8));
+	($recno % $mod) == $shard;
+}
+
+sub maildir_each_file {
+	my ($self, $dir, $cb, @arg) = @_;
 	$dir .= '/' unless substr($dir, -1) eq '/';
+	my ($mod, $shard) = @{$self->{shard_info} // []};
 	for my $d (qw(new/ cur/)) {
 		my $pfx = $dir.$d;
 		opendir my $dh, $pfx or next;
 		while (defined(my $bn = readdir($dh))) {
 			maildir_basename_flags($bn) // next;
+			next if defined($mod) && !shard_ok($bn, $mod, $shard);
 			$cb->($pfx.$bn, @arg);
 		}
 	}
@@ -40,15 +52,17 @@ sub maildir_each_file ($$;@) {
 my %c2kw = ('D' => 'draft', F => 'flagged', P => 'forwarded',
 	R => 'answered', S => 'seen');
 
-sub maildir_each_eml ($$;@) {
-	my ($dir, $cb, @arg) = @_;
+sub maildir_each_eml {
+	my ($self, $dir, $cb, @arg) = @_;
 	$dir .= '/' unless substr($dir, -1) eq '/';
+	my ($mod, $shard) = @{$self->{shard_info} // []};
 	my $pfx = $dir . 'new/';
 	if (opendir(my $dh, $pfx)) {
 		while (defined(my $bn = readdir($dh))) {
 			next if substr($bn, 0, 1) eq '.';
 			my @f = split(/:/, $bn, -1);
 			next if scalar(@f) != 1;
+			next if defined($mod) && !shard_ok($bn, $mod, $shard);
 			my $f = $pfx.$bn;
 			my $eml = eml_from_path($f) or next;
 			$cb->($f, [], $eml, @arg);
@@ -59,6 +73,7 @@ sub maildir_each_eml ($$;@) {
 	while (defined(my $bn = readdir($dh))) {
 		my $fl = maildir_basename_flags($bn) // next;
 		next if index($fl, 'T') >= 0;
+		next if defined($mod) && !shard_ok($bn, $mod, $shard);
 		my $f = $pfx.$bn;
 		my $eml = eml_from_path($f) or next;
 		my @kw = sort(map { $c2kw{$_} // () } split(//, $fl));
@@ -66,4 +81,6 @@ sub maildir_each_eml ($$;@) {
 	}
 }
 
+sub new { bless {}, __PACKAGE__ }
+
 1;
diff --git a/t/lei-convert.t b/t/lei-convert.t
index dc53b82c..0ea860c8 100644
--- a/t/lei-convert.t
+++ b/t/lei-convert.t
@@ -57,7 +57,7 @@ test_lei({ tmpdir => $tmpdir }, sub {
 	lei_ok('convert', '-o', "$d/md", "mboxrd:$d/foo.mboxrd");
 	ok(-d "$d/md", 'Maildir created');
 	my @md;
-	PublicInbox::MdirReader::maildir_each_eml("$d/md", sub {
+	PublicInbox::MdirReader->new->maildir_each_eml("$d/md", sub {
 		push @md, $_[2];
 	});
 	is(scalar(@md), scalar(@mboxrd), 'got expected emails in Maildir') or
diff --git a/t/lei_to_mail.t b/t/lei_to_mail.t
index 75314add..51357257 100644
--- a/t/lei_to_mail.t
+++ b/t/lei_to_mail.t
@@ -253,7 +253,7 @@ SKIP: { # FIFO support
 }
 
 { # Maildir support
-	my $each_file = PublicInbox::MdirReader->can('maildir_each_file');
+	my $mdr = PublicInbox::MdirReader->new;
 	my $md = "$tmpdir/maildir/";
 	my $wcb = $wcb_get->('maildir', $md);
 	is(ref($wcb), 'CODE', 'got Maildir callback');
@@ -261,7 +261,7 @@ SKIP: { # FIFO support
 	$wcb->(\(my $x = $buf), $b4dc0ffee);
 
 	my @f;
-	$each_file->($md, sub { push @f, shift });
+	$mdr->maildir_each_file($md, sub { push @f, shift });
 	open my $fh, $f[0] or BAIL_OUT $!;
 	is(do { local $/; <$fh> }, $buf, 'wrote to Maildir');
 
@@ -270,7 +270,7 @@ SKIP: { # FIFO support
 	$wcb->(\($x = $buf."\nx\n"), $deadcafe);
 
 	my @x = ();
-	$each_file->($md, sub { push @x, shift });
+	$mdr->maildir_each_file($md, sub { push @x, shift });
 	is(scalar(@x), 1, 'wrote one new file');
 	ok(!-f $f[0], 'old file clobbered');
 	open $fh, $x[0] or BAIL_OUT $!;
@@ -281,7 +281,7 @@ SKIP: { # FIFO support
 	$wcb->(\($x = $buf."\ny\n"), $deadcafe);
 	$wcb->(\($x = $buf."\ny\n"), $b4dc0ffee); # skipped by dedupe
 	@f = ();
-	$each_file->($md, sub { push @f, shift });
+	$mdr->maildir_each_file($md, sub { push @f, shift });
 	is(scalar grep(/\A\Q$x[0]\E\z/, @f), 1, 'old file still there');
 	my @new = grep(!/\A\Q$x[0]\E\z/, @f);
 	is(scalar @new, 1, '1 new file written (b4dc0ffee skipped)');

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 4/5] lei_to_mail: improve comments and reduce LoC
  2021-04-05 10:27 [PATCH 0/5] lei_to_mail fixes Eric Wong
                   ` (2 preceding siblings ...)
  2021-04-05 10:27 ` [PATCH 3/5] lei: maildir: move shard support to MdirReader Eric Wong
@ 2021-04-05 10:27 ` Eric Wong
  2021-04-05 10:27 ` [PATCH 5/5] lei q: fix auth IMAP --output with remote mboxrd Eric Wong
  4 siblings, 0 replies; 6+ messages in thread
From: Eric Wong @ 2021-04-05 10:27 UTC (permalink / raw)
  To: meta

We don't need to waste LoC on corner cases, single-use internal
subs, or restoring SIG{__WARN__} when a process exits.  All that
extra code contributes to memory use and startup time, especially
for users who can't use FD passing.
---
 lib/PublicInbox/LeiToMail.pm | 42 +++++++++++++++---------------------
 1 file changed, 17 insertions(+), 25 deletions(-)

diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index 2e736070..9411313b 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -138,18 +138,11 @@ sub eml2mboxcl2 {
 
 sub git_to_mail { # git->cat_async callback
 	my ($bref, $oid, $type, $size, $arg) = @_;
-	if ($type ne 'blob') {
-		if ($type eq 'missing') {
-			warn "missing $oid\n";
-		} else {
-			warn "unexpected type=$type for $oid\n";
-		}
-	}
+	return warn("W: $oid is $type (!= blob)\n") if $type ne 'blob';
+	return warn("E: $oid is empty\n") unless $size;
 	my ($write_cb, $smsg) = @$arg;
-	if ($smsg->{blob} ne $oid) {
-		die "BUG: expected=$smsg->{blob} got=$oid";
-	}
-	$write_cb->($bref, $smsg) if $size > 0;
+	die "BUG: expected=$smsg->{blob} got=$oid" if $smsg->{blob} ne $oid;
+	$write_cb->($bref, $smsg);
 }
 
 sub reap_compress { # dwaitpid callback
@@ -159,7 +152,7 @@ sub reap_compress { # dwaitpid callback
 	$lei->fail("@$cmd failed", $? >> 8);
 }
 
-sub _post_augment_mbox { # open a compressor process
+sub _post_augment_mbox { # open a compressor process from top-level process
 	my ($self, $lei) = @_;
 	my $zsfx = $self->{zsfx} or return;
 	my $cmd = PublicInbox::MboxReader::zsfx2cmd($zsfx, undef, $lei);
@@ -173,12 +166,6 @@ sub _post_augment_mbox { # open a compressor process
 	$lei->{1} = $pp;
 }
 
-sub dup_src ($) {
-	my ($in) = @_;
-	open my $dup, '+>>&', $in or die "dup: $!";
-	$dup;
-}
-
 # --augment existing output destination, with deduplication
 sub _augment { # MboxReader eml_cb
 	my ($eml, $lei) = @_;
@@ -309,7 +296,7 @@ sub _imap_write_cb ($$) {
 	my $lse = $lei->{lse}; # may be undef
 	sub { # for git_to_mail
 		my ($bref, $smsg, $eml) = @_;
-		$mic // return $lei->fail; # dst may be undef-ed in last run
+		$mic // return $lei->fail; # mic may be undef-ed in last run
 		if ($dedupe) {
 			$eml //= PublicInbox::Eml->new($$bref); # copy bref
 			return if $dedupe->is_dup($eml, $smsg->{blob});
@@ -488,9 +475,12 @@ sub _do_augment_mbox {
 		truncate($out, 0) or die "truncate($dst): $!";
 		return;
 	}
-	my $zsfx = $self->{zsfx};
-	my $rd = $zsfx ? PublicInbox::MboxReader::zsfxcat($out, $zsfx, $lei)
-			: dup_src($out);
+	my $rd;
+	if (my $zsfx = $self->{zsfx}) {
+		$rd = PublicInbox::MboxReader::zsfxcat($out, $zsfx, $lei);
+	} else {
+		open($rd, '+>>&', $out) or die "dup: $!";
+	}
 	my $dedupe;
 	if ($opt->{augment}) {
 		$dedupe = $lei->{dedupe};
@@ -535,6 +525,7 @@ sub post_augment {
 	$m->($self, $lei, @args);
 }
 
+# called by every single l2m worker process
 sub do_post_auth {
 	my ($self) = @_;
 	my $lei = $self->{lei};
@@ -542,7 +533,7 @@ sub do_post_auth {
 	pkt_do($lei->{pkt_op_p}, 'incr_start_query') or
 		die "incr_start_query: $!";
 	my $aug;
-	if (lock_free($self)) {
+	if (lock_free($self)) { # all workers do_augment
 		my $mod = $self->{-wq_nr_workers};
 		my $shard = $self->{-wq_worker_nr};
 		if (my $net = $lei->{net}) {
@@ -551,7 +542,7 @@ sub do_post_auth {
 			$self->{shard_info} = [ $mod, $shard ];
 		}
 		$aug = '+'; # incr_post_augment
-	} elsif ($self->{-wq_worker_nr} == 0) {
+	} elsif ($self->{-wq_worker_nr} == 0) { # 1st worker do_augment
 		$aug = '.'; # do_post_augment
 	}
 	if ($aug) {
@@ -561,6 +552,7 @@ sub do_post_auth {
 		pkt_do($lei->{pkt_op_p}, $aug) == 1 or
 				die "do_post_augment trigger: $!";
 	}
+	# done augmenting, connect the compressor pipe for each worker
 	if (my $zpipe = delete $lei->{zpipe}) {
 		$lei->{1} = $zpipe->[1];
 		close $zpipe->[0];
@@ -581,6 +573,7 @@ sub lock_free {
 	$_[0]->{base_type} =~ /\A(?:maildir|imap|jmap)\z/ ? 1 : 0;
 }
 
+# wakes up the MUA when complete so it can refresh messages list
 sub poke_dst {
 	my ($self) = @_;
 	if ($self->{base_type} eq 'maildir') {
@@ -599,7 +592,6 @@ sub wq_atexit_child {
 	my ($self) = @_;
 	delete $self->{wcb};
 	$self->{lei}->{ale}->git->async_wait_all;
-	$SIG{__WARN__} = 'DEFAULT';
 }
 
 # called in top-level lei-daemon when LeiAuth is done

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 5/5] lei q: fix auth IMAP --output with remote mboxrd
  2021-04-05 10:27 [PATCH 0/5] lei_to_mail fixes Eric Wong
                   ` (3 preceding siblings ...)
  2021-04-05 10:27 ` [PATCH 4/5] lei_to_mail: improve comments and reduce LoC Eric Wong
@ 2021-04-05 10:27 ` Eric Wong
  4 siblings, 0 replies; 6+ messages in thread
From: Eric Wong @ 2021-04-05 10:27 UTC (permalink / raw)
  To: meta

IMAP authentication info is only shared amongst lei2mail workers,
so we must ensure all IMAP writes go through lei2mail workers
even if we don't have to access the mail through git.

This allows us to decouple the latency of the remote mboxrd from
the latency of the IMAP --output at the expense of extra IPC
overhead within our own processes.
---
 lib/PublicInbox/LeiOverview.pm | 14 ++++----------
 lib/PublicInbox/LeiToMail.pm   |  3 ++-
 lib/PublicInbox/LeiXSearch.pm  |  4 ++--
 3 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/lib/PublicInbox/LeiOverview.pm b/lib/PublicInbox/LeiOverview.pm
index cdd9ee04..bfb8b143 100644
--- a/lib/PublicInbox/LeiOverview.pm
+++ b/lib/PublicInbox/LeiOverview.pm
@@ -195,7 +195,7 @@ sub _json_pretty {
 }
 
 sub ovv_each_smsg_cb { # runs in wq worker usually
-	my ($self, $lei, $ibxish) = @_;
+	my ($self, $lei) = @_;
 	my ($json, $dedupe);
 	if (my $pkg = $self->{json}) {
 		$json = $pkg->new;
@@ -208,17 +208,11 @@ sub ovv_each_smsg_cb { # runs in wq worker usually
 		$dedupe->prepare_dedupe;
 	}
 	$lei->{ovv_buf} = \(my $buf = '') if !$l2m;
-	if ($l2m && !$ibxish) { # remote https?:// mboxrd
-		my $wcb = $l2m->write_cb($lei);
-		sub {
-			my ($smsg, undef, $eml) = @_; # no mitem in $_[1]
-			$wcb->(undef, $smsg, $eml);
-		};
-	} elsif ($l2m && $l2m->{-wq_s1}) {
+	if ($l2m) {
 		sub {
-			my ($smsg, $mitem) = @_;
+			my ($smsg, $mitem, $eml) = @_;
 			$smsg->{pct} = get_pct($mitem) if $mitem;
-			$l2m->wq_io_do('write_mail', [], $smsg);
+			$l2m->wq_io_do('write_mail', [], $smsg, $eml);
 		}
 	} elsif ($self->{fmt} =~ /\A(concat)?json\z/ && $lei->{opt}->{pretty}) {
 		my $EOR = ($1//'') eq 'concat' ? "\n}" : "\n},";
diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index 9411313b..70164e40 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -583,7 +583,8 @@ sub poke_dst {
 }
 
 sub write_mail { # via ->wq_io_do
-	my ($self, $smsg) = @_;
+	my ($self, $smsg, $eml) = @_;
+	return $self->{wcb}->(undef, $smsg, $eml) if $eml;
 	$self->{lei}->{ale}->git->cat_async($smsg->{blob}, \&git_to_mail,
 				[$self->{wcb}, $smsg]);
 }
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index 2b23e8e9..692d5e54 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -153,7 +153,7 @@ sub query_thread_mset { # for --threads
 	return warn("$desc not indexed by Xapian\n") unless ($srch && $over);
 	my $mo = { %{$lei->{mset_opt}} };
 	my $mset;
-	my $each_smsg = $lei->{ovv}->ovv_each_smsg_cb($lei, $ibxish);
+	my $each_smsg = $lei->{ovv}->ovv_each_smsg_cb($lei);
 	my $can_kw = !!$ibxish->can('msg_keywords');
 	my $fl = $lei->{opt}->{threads} > 1 ? 1 : undef;
 	do {
@@ -196,7 +196,7 @@ sub query_mset { # non-parallel for non-"--threads" users
 	for my $loc (locals($self)) {
 		attach_external($self, $loc);
 	}
-	my $each_smsg = $lei->{ovv}->ovv_each_smsg_cb($lei, $self);
+	my $each_smsg = $lei->{ovv}->ovv_each_smsg_cb($lei);
 	do {
 		$mset = $self->mset($mo->{qstr}, $mo);
 		mset_progress($lei, 'xsearch', $mset->size,

^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2021-04-05 10:27 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-05 10:27 [PATCH 0/5] lei_to_mail fixes Eric Wong
2021-04-05 10:27 ` [PATCH 1/5] lei_to_mail: trim down imports Eric Wong
2021-04-05 10:27 ` [PATCH 2/5] lei_tag: fix comments w.r.t support levels Eric Wong
2021-04-05 10:27 ` [PATCH 3/5] lei: maildir: move shard support to MdirReader Eric Wong
2021-04-05 10:27 ` [PATCH 4/5] lei_to_mail: improve comments and reduce LoC Eric Wong
2021-04-05 10:27 ` [PATCH 5/5] lei q: fix auth IMAP --output with remote mboxrd Eric Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).