unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 3/3] lei: propagate keyword changes from lei/store
Date: Thu,  2 Sep 2021 10:17:58 +0000	[thread overview]
Message-ID: <20210902101758.18715-4-e@80x24.org> (raw)
In-Reply-To: <20210902101758.18715-1-e@80x24.org>

This works with existing inotify/EVFILT_VNODE functionality to
propagate changes made from one Maildir to another Maildir.

I chose the lei/store worker process to handle this since
propagating changes back into lei-daemon on a massive scale
could lead to dead-locking while both processes are attempting
to write to each other.  Eliminating IPC overhead is a nice
side effect, but could hurt performance if Maildirs are slow.

The code for "lei export-kw" is significantly revamped to match
the new code used in the "lei/store" daemon.  It should be more
correct w.r.t. corner-cases and stale entries, but perhaps
better tests need to be written.
---
 MANIFEST                        |   1 +
 lib/PublicInbox/LeiExportKw.pm  |  24 ++++----
 lib/PublicInbox/LeiNoteEvent.pm |  14 ++---
 lib/PublicInbox/LeiStore.pm     | 105 ++++++++++++++++++++++++++++++--
 t/lei-auto-watch.t              |  45 ++++++++++++++
 5 files changed, 165 insertions(+), 24 deletions(-)
 create mode 100644 t/lei-auto-watch.t

diff --git a/MANIFEST b/MANIFEST
index cf7268ed..be6ec927 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -423,6 +423,7 @@ t/init.t
 t/ipc.t
 t/iso-2202-jp.eml
 t/kqnotify.t
+t/lei-auto-watch.t
 t/lei-convert.t
 t/lei-daemon.t
 t/lei-export-kw.t
diff --git a/lib/PublicInbox/LeiExportKw.pm b/lib/PublicInbox/LeiExportKw.pm
index 42a5ff22..78c6c6f9 100644
--- a/lib/PublicInbox/LeiExportKw.pm
+++ b/lib/PublicInbox/LeiExportKw.pm
@@ -25,12 +25,11 @@ sub export_kw_md { # LeiMailSync->each_src callback
 	}
 	$bn .= ':2,'.
 		PublicInbox::LeiToMail::kw2suffix([keys %$sto_kw], @$unknown);
+	return if $bn eq $$id;
 	my $dst = "$mdir/cur/$bn";
-	my @fail;
 	my $lei = $self->{lei};
 	for my $d (@try) {
 		my $src = "$mdir/$d/$$id";
-		next if $src eq $dst;
 
 		# we use link(2) + unlink(2) since rename(2) may
 		# inadvertently clobber if the "uniquefilename" part wasn't
@@ -44,20 +43,19 @@ sub export_kw_md { # LeiMailSync->each_src callback
 			$lei->{sto}->ipc_do('lms_mv_src', "maildir:$mdir",
 						$oidbin, $id, $bn);
 			return; # success anyways if link(2) worked
-		}
-		if ($! == ENOENT && !-e $src) { # some other process moved it
-			$lei->{sto}->ipc_do('lms_clear_src',
-						"maildir:$mdir", $id);
-			next;
-		}
-		push @fail, $src if $! != EEXIST;
+		} elsif ($! == EEXIST) { # lost race with lei/store?
+			return;
+		} elsif ($! != ENOENT) {
+			$lei->child_error(1, "E: link($src -> $dst): $!");
+		} # else loop @try
 	}
-	return unless @fail;
-	# both tries failed
 	my $e = $!;
-	my $orig = '['.join('|', @fail).']';
+	# both tries failed
 	my $oidhex = unpack('H*', $oidbin);
-	$lei->child_error(1, "link($orig, $dst) ($oidhex): $e");
+	my $src = "$mdir/{".join(',', @try)."}/$$id";
+	$lei->child_error(1, "link($src -> $dst) ($oidhex): $e");
+	for (@try) { return if -e "$mdir/$_/$$id" }
+	$lei->{sto}->ipc_do('lms_clear_src', "maildir:$mdir", $id);
 }
 
 sub export_kw_imap { # LeiMailSync->each_src callback
diff --git a/lib/PublicInbox/LeiNoteEvent.pm b/lib/PublicInbox/LeiNoteEvent.pm
index 6a40ba39..41415346 100644
--- a/lib/PublicInbox/LeiNoteEvent.pm
+++ b/lib/PublicInbox/LeiNoteEvent.pm
@@ -36,32 +36,31 @@ sub note_event_arm_done ($) {
 }
 
 sub eml_event ($$$$) {
-	my ($self, $eml, $kw, $state) = @_;
+	my ($self, $eml, $vmd, $state) = @_;
 	my $sto = $self->{lei}->{sto};
 	my $lse = $self->{lse} //= $sto->search;
-	my $vmd = { kw => $kw };
 	if ($state =~ /\Aimport-(?:rw|ro)\z/) {
 		$sto->ipc_do('set_eml', $eml, $vmd);
 	} elsif ($state =~ /\Aindex-(?:rw|ro)\z/) {
 		my $xoids = $self->{lei}->ale->xoids_for($eml);
 		$sto->ipc_do('index_eml_only', $eml, $vmd, $xoids);
 	} elsif ($state =~ /\Atag-(?:rw|ro)\z/) {
-		my $c = $lse->kw_changed($eml, $kw, my $docids = []);
+		my $c = $lse->kw_changed($eml, $vmd->{kw}, my $docids = []);
 		if (scalar @$docids) { # already in lei/store
 			$sto->ipc_do('set_eml_vmd', undef, $vmd, $docids) if $c;
 		} elsif (my $xoids = $self->{lei}->ale->xoids_for($eml)) {
 			# it's in an external, only set kw, here
 			$sto->ipc_do('set_xvmd', $xoids, $eml, $vmd);
-		} # else { totally unknown
+		} # else { totally unknown: ignore
 	} else {
 		warn "unknown state: $state (in $self->{lei}->{cfg}->{'-f'})\n";
 	}
 }
 
 sub maildir_event { # via wq_io_do
-	my ($self, $fn, $kw, $state) = @_;
+	my ($self, $fn, $vmd, $state) = @_;
 	my $eml = PublicInbox::InboxWritable::eml_from_path($fn) // return;
-	eml_event($self, $eml, $kw, $state);
+	eml_event($self, $eml, $vmd, $state);
 }
 
 sub lei_note_event {
@@ -98,7 +97,8 @@ sub lei_note_event {
 			// return;
 		return if index($fl, 'T') >= 0;
 		my $kw = PublicInbox::MdirReader::flags2kw($fl);
-		$self->wq_io_do('maildir_event', [], $fn, $kw, $state);
+		my $vmd = { kw => $kw, sync_info => [ $folder, \$bn ] };
+		$self->wq_io_do('maildir_event', [], $fn, $vmd, $state);
 	} # else: TODO: imap
 }
 
diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm
index 0fa2d3c0..a91b30f7 100644
--- a/lib/PublicInbox/LeiStore.pm
+++ b/lib/PublicInbox/LeiStore.pm
@@ -25,10 +25,14 @@ use PublicInbox::MID qw(mids);
 use PublicInbox::LeiSearch;
 use PublicInbox::MDA;
 use PublicInbox::Spawn qw(spawn);
+use PublicInbox::MdirReader;
+use PublicInbox::LeiToMail;
 use List::Util qw(max);
 use File::Temp ();
 use POSIX ();
 use IO::Handle (); # ->autoflush
+use Sys::Syslog qw(syslog openlog);
+use Errno qw(EEXIST ENOENT);
 
 sub new {
 	my (undef, $dir, $opt) = @_;
@@ -165,12 +169,92 @@ sub _docids_for ($$) {
 	sort { $a <=> $b } values %docids;
 }
 
+# n.b. similar to LeiExportKw->export_kw_md, but this is for a single eml
+sub export1_kw_md ($$$$$) {
+	my ($self, $mdir, $bn, $oidbin, $vmdish) = @_; # vmd/vmd_mod
+	my $orig = $bn;
+	my (@try, $unkn, $kw);
+	if ($bn =~ s/:2,([a-zA-Z]*)\z//) {
+		($kw, $unkn) = PublicInbox::MdirReader::flags2kw($1);
+		if (my $set = $vmdish->{kw}) {
+			$kw = $set;
+		} elsif (my $add = $vmdish->{'+kw'}) {
+			@$kw{@$add} = ();
+		} elsif (my $del = $vmdish->{-kw}) {
+			delete @$kw{@$del};
+		} # else no changes...
+		@try = qw(cur new);
+	} else { # no keywords, yet, could be in new/
+		@try = qw(new cur);
+		$unkn = [];
+		if (my $set = $vmdish->{kw}) {
+			$kw = $set;
+		} elsif (my $add = $vmdish->{'+kw'}) {
+			@$kw{@$add} = (); # auto-vivify
+		} else { # ignore $vmdish->{-kw}
+			$kw = [];
+		}
+	}
+	$kw = [ keys %$kw ] if ref($kw) eq 'HASH';
+	$bn .= ':2,'. PublicInbox::LeiToMail::kw2suffix($kw, @$unkn);
+	return if $orig eq $bn; # no change
+
+	# we use link(2) + unlink(2) since rename(2) may
+	# inadvertently clobber if the "uniquefilename" part wasn't
+	# actually unique.
+	my $dst = "$mdir/cur/$bn";
+	for my $d (@try) {
+		my $src = "$mdir/$d/$orig";
+		if (link($src, $dst)) {
+			if (!unlink($src) and $! != ENOENT) {
+				syslog('warning', "unlink($src): $!");
+			}
+			# TODO: verify oidbin?
+			lms_mv_src($self, "maildir:$mdir",
+					$oidbin, \$orig, $bn);
+			return;
+		} elsif ($! == EEXIST) { # lost race with "lei export-kw"?
+			return;
+		} elsif ($! == ENOENT) {
+			syslog('warning', "link($src -> $dst): $!")
+		} # else loop @try
+	}
+	my $e = $!;
+	my $src = "$mdir/{".join(',', @try)."}/$orig";
+	my $oidhex = unpack('H*', $oidbin);
+	syslog('warning', "link($src -> $dst) ($oidhex): $e");
+	for (@try) { return if -e "$mdir/$_/$orig" };
+	lms_clear_src($self, "maildir:$mdir", \$orig);
+}
+
+sub sto_export_kw ($$$) {
+	my ($self, $docid, $vmdish) = @_; # vmdish (vmd or vmd_mod)
+	my ($eidx, $tl) = eidx_init($self);
+	my $lms = _lms_rw($self) // return;
+	my $xr3 = $eidx->{oidx}->get_xref3($docid, 1);
+	for my $row (@$xr3) {
+		my (undef, undef, $oidbin) = @$row;
+		my $locs = $lms->locations_for($oidbin) // next;
+		while (my ($loc, $ids) = each %$locs) {
+			if ($loc =~ s!\Amaildir:!!i) {
+				for my $id (@$ids) {
+					export1_kw_md($self, $loc, $id,
+							$oidbin, $vmdish);
+				}
+			}
+			# TODO: IMAP
+		}
+	}
+}
+
+# vmd = { kw => [ qw(seen ...) ], L => [ qw(inbox ...) ] }
 sub set_eml_vmd {
 	my ($self, $eml, $vmd, $docids) = @_;
 	my ($eidx, $tl) = eidx_init($self);
 	$docids //= [ _docids_for($self, $eml) ];
 	for my $docid (@$docids) {
 		$eidx->idx_shard($docid)->ipc_do('set_vmd', $docid, $vmd);
+		sto_export_kw($self, $docid, $vmd);
 	}
 	$docids;
 }
@@ -284,6 +368,12 @@ EOF
 	$docid;
 }
 
+sub _add_vmd ($$$$) {
+	my ($self, $idx, $docid, $vmd) = @_;
+	$idx->ipc_do('add_vmd', $docid, $vmd);
+	sto_export_kw($self, $docid, $vmd);
+}
+
 sub add_eml {
 	my ($self, $eml, $vmd, $xoids) = @_;
 	my $im = $self->{-fake_im} // $self->importer; # may create new epoch
@@ -310,7 +400,7 @@ sub add_eml {
 			@$vivify_xvmd = sort { $a <=> $b } keys(%docids);
 		}
 	}
-	if (@$vivify_xvmd) {
+	if (@$vivify_xvmd) { # docids list
 		$xoids //= {};
 		$xoids->{$smsg->{blob}} = 1;
 		for my $docid (@$vivify_xvmd) {
@@ -327,7 +417,7 @@ sub add_eml {
 			for my $oid (keys %$xoids) {
 				$oidx->add_xref3($docid, -1, $oid, '.');
 			}
-			$idx->ipc_do('add_vmd', $docid, $vmd) if $vmd;
+			_add_vmd($self, $idx, $docid, $vmd) if $vmd;
 		}
 		$vivify_xvmd;
 	} elsif (my @docids = _docids_for($self, $eml)) {
@@ -337,7 +427,7 @@ sub add_eml {
 			$oidx->add_xref3($docid, -1, $smsg->{blob}, '.');
 			# add_eidx_info for List-Id
 			$idx->ipc_do('add_eidx_info', $docid, '.', $eml);
-			$idx->ipc_do('add_vmd', $docid, $vmd) if $vmd;
+			_add_vmd($self, $idx, $docid, $vmd) if $vmd;
 		}
 		\@docids;
 	} else { # totally new message
@@ -347,7 +437,7 @@ sub add_eml {
 		$oidx->add_xref3($smsg->{num}, -1, $smsg->{blob}, '.');
 		my $idx = $eidx->idx_shard($smsg->{num});
 		$idx->index_eml($eml, $smsg);
-		$idx->ipc_do('add_vmd', $smsg->{num}, $vmd) if $vmd;
+		_add_vmd($self, $idx, $smsg->{num}, $vmd) if $vmd;
 		$smsg;
 	}
 }
@@ -365,6 +455,7 @@ sub index_eml_only {
 	set_eml($self, $eml, $vmd, $xoids);
 }
 
+# store {kw} / {L} info for a message which is only in an external
 sub _external_only ($$$) {
 	my ($self, $xoids, $eml) = @_;
 	my $eidx = $self->{priv_eidx};
@@ -398,6 +489,7 @@ sub update_xvmd {
 		next if $seen{$docid}++;
 		my $idx = $eidx->idx_shard($docid);
 		$idx->ipc_do('update_vmd', $docid, $vmd_mod);
+		sto_export_kw($self, $docid, $vmd_mod);
 	}
 	return unless scalar(keys(%$xoids));
 
@@ -410,12 +502,14 @@ sub update_xvmd {
 			}
 			my $idx = $eidx->idx_shard($docid);
 			$idx->ipc_do('update_vmd', $docid, $vmd_mod);
+			sto_export_kw($self, $docid, $vmd_mod);
 		}
 		return;
 	}
 	# totally unseen
 	my ($smsg, $idx) = _external_only($self, $xoids, $eml);
 	$idx->ipc_do('update_vmd', $smsg->{num}, $vmd_mod);
+	sto_export_kw($self, $smsg->{num}, $vmd_mod);
 }
 
 # set or update keywords for external message, called via ipc_do
@@ -433,6 +527,7 @@ sub set_xvmd {
 		next if $seen{$docid}++;
 		my $idx = $eidx->idx_shard($docid);
 		$idx->ipc_do('set_vmd', $docid, $vmd);
+		sto_export_kw($self, $docid, $vmd);
 	}
 	return unless scalar(keys(%$xoids));
 
@@ -443,6 +538,7 @@ sub set_xvmd {
 	# totally unseen:
 	my ($smsg, $idx) = _external_only($self, $xoids, $eml);
 	$idx->ipc_do('add_vmd', $smsg->{num}, $vmd);
+	sto_export_kw($self, $smsg->{num}, $vmd);
 }
 
 sub checkpoint {
@@ -497,6 +593,7 @@ sub ipc_atfork_child {
 	if (my $to_close = delete($self->{to_close})) {
 		close($_) for @$to_close;
 	}
+	openlog('lei/store', 'pid,nowait,nofatal,ndelay', 'user');
 	$self->SUPER::ipc_atfork_child;
 }
 
diff --git a/t/lei-auto-watch.t b/t/lei-auto-watch.t
new file mode 100644
index 00000000..321c0ab3
--- /dev/null
+++ b/t/lei-auto-watch.t
@@ -0,0 +1,45 @@
+#!perl -w
+# Copyright all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict; use v5.10.1; use PublicInbox::TestCommon;
+use File::Basename qw(basename);
+my ($ro_home, $cfg_path) = setup_public_inboxes;
+my $have_fast_inotify = eval { require Linux::Inotify2 } ||
+	eval { require IO::KQueue };
+
+$have_fast_inotify or
+	diag("$0 IO::KQueue or Linux::Inotify2 missing, test will be slow");
+
+test_lei(sub {
+	my $x = "$ENV{HOME}/x";
+	my $y = "$ENV{HOME}/y";
+	lei_ok qw(add-external), "$ro_home/t1";
+	lei_ok qw(q mid:testmessage@example.com -o), $x;
+	lei_ok qw(q mid:testmessage@example.com -o), $y;
+	my @x = glob("$x/cur/*");
+	my @y = glob("$y/cur/*");
+	scalar(@x) == 1 or xbail 'expected 1 file', \@x;
+	scalar(@y) == 1 or xbail 'expected 1 file', \@y;
+
+	my $oid = '9bf1002c49eb075df47247b74d69bcd555e23422';
+	lei_ok qw(inspect), "blob:$oid";
+	my $ins = json_utf8->decode($lei_out);
+	my $exp = { "maildir:$x" => [ map { basename($_) } @x ],
+		"maildir:$y" => [ map { basename($_) } @y ] };
+	is_deeply($ins->{'mail-sync'}, $exp, 'inspect as expected');
+	lei_ok qw(add-watch), $x;
+	my $dst = $x[0] . 'S';
+	rename($x[0], $dst) or xbail "rename($x[0], $dst): $!";
+	tick($have_fast_inotify ? undef : 2.1); # wait for inotify
+	my @y2 = glob("$y/*/*");
+	is_deeply(\@y2, [ "$y[0]S" ], "`seen' kw propagated to `y' dir");
+	lei_ok qw(note-event done);
+	tick; # XXX why is this needed?
+	lei_ok qw(inspect), "blob:$oid";
+	$ins = json_utf8->decode($lei_out);
+	$exp = { "maildir:$x" => [ map { basename($_) } glob("$x/*/*") ],
+		"maildir:$y" => [ map { basename($_) } glob("$y/*/*") ] };
+	is_deeply($ins->{'mail-sync'}, $exp, 'mail_sync matches FS');
+});
+
+done_testing;

  parent reply	other threads:[~2021-09-02 10:17 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-02 10:17 [PATCH 0/3] lei: auto keyword propagation to Maildirs Eric Wong
2021-09-02 10:17 ` [PATCH 1/3] lei_mail_sync: do not use transactions Eric Wong
2021-09-02 10:17 ` [PATCH 2/3] lei_input: set and prepare watches early Eric Wong
2021-09-02 10:17 ` Eric Wong [this message]
2021-09-02 10:25   ` [SQUASH 4/3] t/lei-auto-watch: workaround for FreeBSD kevent Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210902101758.18715-4-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).