unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 2/3] lei refresh-mail-sync: replace prune-mail-sync
Date: Thu, 16 Sep 2021 09:41:15 +0000	[thread overview]
Message-ID: <20210916094116.11457-3-e@80x24.org> (raw)
In-Reply-To: <20210916094116.11457-1-e@80x24.org>

Merely pruning mail synchronization information was
insufficient for Maildir: renames are common in Maildir
and we need to detect them after-the-fact when lei-daemon
isn't running.

Running this command could make "lei index" far more
useful...
---
 MANIFEST                                      |  2 +
 lib/PublicInbox/LEI.pm                        |  3 +-
 ...PruneMailSync.pm => LeiRefreshMailSync.pm} | 36 +++++++---
 lib/PublicInbox/LeiStore.pm                   |  5 ++
 t/lei-export-kw.t                             |  1 -
 t/lei-refresh-mail-sync.t                     | 67 +++++++++++++++++++
 6 files changed, 103 insertions(+), 11 deletions(-)
 rename lib/PublicInbox/{LeiPruneMailSync.pm => LeiRefreshMailSync.pm} (70%)
 create mode 100644 t/lei-refresh-mail-sync.t

diff --git a/MANIFEST b/MANIFEST
index 640eabd1..221cb992 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -238,6 +238,7 @@ lib/PublicInbox/LeiPmdir.pm
 lib/PublicInbox/LeiPruneMailSync.pm
 lib/PublicInbox/LeiQuery.pm
 lib/PublicInbox/LeiRediff.pm
+lib/PublicInbox/LeiRefreshMailSync.pm
 lib/PublicInbox/LeiRemote.pm
 lib/PublicInbox/LeiRm.pm
 lib/PublicInbox/LeiRmWatch.pm
@@ -450,6 +451,7 @@ t/lei-q-kw.t
 t/lei-q-remote-import.t
 t/lei-q-save.t
 t/lei-q-thread.t
+t/lei-refresh-mail-sync.t
 t/lei-sigpipe.t
 t/lei-tag.t
 t/lei-up.t
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index ec103231..9794497b 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -263,7 +263,7 @@ our %CMD = ( # sorted in order of importance/use:
 	@net_opt, @c_opt ],
 'forget-mail-sync' => [ 'LOCATION...',
 	'forget sync information for a mail folder', @c_opt ],
-'prune-mail-sync' => [ 'LOCATION...|--all',
+'refresh-mail-sync' => [ 'LOCATION...|--all',
 	'prune dangling sync data for a mail folder', 'all:s', @c_opt ],
 'export-kw' => [ 'LOCATION...|--all',
 	'one-time export of keywords of sync sources',
@@ -616,6 +616,7 @@ sub pkt_ops {
 	$ops->{x_it} = [ \&x_it, $lei ];
 	$ops->{child_error} = [ \&child_error, $lei ];
 	$ops->{incr} = [ \&incr, $lei ];
+	$ops->{sto_done_request} = [ \&sto_done_request, $lei, $lei->{sock} ];
 	$ops;
 }
 
diff --git a/lib/PublicInbox/LeiPruneMailSync.pm b/lib/PublicInbox/LeiRefreshMailSync.pm
similarity index 70%
rename from lib/PublicInbox/LeiPruneMailSync.pm
rename to lib/PublicInbox/LeiRefreshMailSync.pm
index 3678bd04..07b0aa52 100644
--- a/lib/PublicInbox/LeiPruneMailSync.pm
+++ b/lib/PublicInbox/LeiRefreshMailSync.pm
@@ -1,16 +1,20 @@
 # Copyright (C) 2021 all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
-# "lei prune-mail-sync" drops dangling sync information
-package PublicInbox::LeiPruneMailSync;
+# "lei refresh-mail-sync" drops dangling sync information
+# and attempts to detect moved files
+package PublicInbox::LeiRefreshMailSync;
 use strict;
 use v5.10.1;
 use parent qw(PublicInbox::IPC PublicInbox::LeiInput);
 use PublicInbox::LeiExportKw;
 use PublicInbox::InboxWritable qw(eml_from_path);
+use PublicInbox::ContentHash qw(git_sha);
+use PublicInbox::Import;
 
 sub eml_match ($$) {
 	my ($eml, $oidbin) = @_;
+	$eml->header_set($_) for @PublicInbox::Import::UNWANTED_HEADERS;
 	$oidbin eq git_sha(length($oidbin) == 20 ? 1 : 256, $eml)->digest;
 }
 
@@ -20,7 +24,7 @@ sub prune_mdir { # lms->each_src callback
 	for my $d (@try) {
 		my $src = "$mdir/$d/$$id";
 		if ($self->{verify}) {
-			my $eml = eml_from_path($src) or next;
+			my $eml = eml_from_path($src) // next;
 			return if eml_match($eml, $oidbin);
 		} elsif (-f $src) {
 			return;
@@ -38,12 +42,27 @@ sub prune_imap { # lms->each_src callback
 	$self->{lei}->{sto}->ipc_do('lms_clear_src', $url, $uid);
 }
 
+# detects missed file moves
+sub pmdir_cb { # called via LeiPmdir->each_mdir_fn
+	my ($self, $f, $fl) = @_;
+	my ($folder, $bn) = ($f =~ m!\A(.+?)/(?:new|cur)/([^/]+)\z!) or
+		die "BUG: $f was not from a Maildir?";
+	substr($folder, 0, 0) = 'maildir:'; # add prefix
+	my $lms = $self->{-lms_ro} //= $self->{lei}->lms;
+	return if defined($lms->name_oidbin($folder, $bn));
+	my $eml = eml_from_path($f) // return;
+	my $oidbin = $self->{lei}->git_oid($eml)->digest;
+	$self->{lei}->{sto}->ipc_do('lms_set_src', $oidbin, $folder, \$bn);
+}
+
 sub input_path_url { # overrides PublicInbox::LeiInput::input_path_url
 	my ($self, $input, @args) = @_;
 	my $lms = $self->{-lms_ro} //= $self->{lei}->lms;
 	if ($input =~ /\Amaildir:(.+)/i) {
-		my $mdir = $1;
-		$lms->each_src($input, \&prune_mdir, $self, $mdir);
+		$lms->each_src($input, \&prune_mdir, $self, my $mdir = $1);
+		$self->{lse} //= $self->{lei}->{sto}->search;
+		# call pmdir_cb (via maildir_each_file -> each_mdir_fn)
+		PublicInbox::LeiInput::input_path_url($self, $input);
 	} elsif ($input =~ m!\Aimaps?://!i) {
 		my $uri = PublicInbox::URIimap->new($input);
 		my $mic = $self->{lei}->{net}->mic_for_folder($uri);
@@ -51,10 +70,10 @@ sub input_path_url { # overrides PublicInbox::LeiInput::input_path_url
 		$uids = +{ map { $_ => undef } @$uids };
 		$lms->each_src($$uri, \&prune_imap, $self, $uids, $$uri);
 	} else { die "BUG: $input not supported" }
-	my $wait = $self->{lei}->{sto}->ipc_do('done');
+	$self->{lei}->{pkt_op_p}->pkt_do('sto_done_request');
 }
 
-sub lei_prune_mail_sync {
+sub lei_refresh_mail_sync {
 	my ($lei, @folders) = @_;
 	my $sto = $lei->_lei_store or return $lei->fail(<<EOM);
 lei/store uninitialized, see lei-import(1)
@@ -78,7 +97,6 @@ EOM
 	$self->prepare_inputs($lei, \@folders) or return;
 	my $j = $lei->{opt}->{jobs} || scalar(@{$self->{inputs}}) || 1;
 	my $ops = {};
-	$sto->write_prepare($lei);
 	$lei->{auth}->op_merge($ops, $self) if $lei->{auth};
 	$self->{-wq_nr_workers} = $j // 1; # locked
 	(my $op_c, $ops) = $lei->workers_start($self, $j, $ops);
@@ -89,7 +107,7 @@ EOM
 }
 
 no warnings 'once';
-*_complete_prune_mail_sync = \&PublicInbox::LeiExportKw::_complete_export_kw;
+*_complete_refresh_mail_sync = \&PublicInbox::LeiExportKw::_complete_export_kw;
 *ipc_atfork_child = \&PublicInbox::LeiInput::input_only_atfork_child;
 *net_merge_all_done = \&PublicInbox::LeiInput::input_only_net_merge_all_done;
 
diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm
index e8bcb04e..32f55abd 100644
--- a/lib/PublicInbox/LeiStore.pm
+++ b/lib/PublicInbox/LeiStore.pm
@@ -293,6 +293,11 @@ sub set_sync_info {
 	_lms_rw($self)->set_src(pack('H*', $oidhex), $folder, $id);
 }
 
+sub lms_set_src {
+	my ($self, $oidbin, $folder, $id) = @_;
+	_lms_rw($self)->set_src($oidbin, $folder, $id);
+}
+
 sub _remove_if_local { # git->cat_async arg
 	my ($bref, $oidhex, $type, $size, $self) = @_;
 	$self->{im}->remove($bref) if $bref;
diff --git a/t/lei-export-kw.t b/t/lei-export-kw.t
index 9531949a..1fe940bb 100644
--- a/t/lei-export-kw.t
+++ b/t/lei-export-kw.t
@@ -6,7 +6,6 @@ use File::Copy qw(cp);
 use File::Path qw(make_path);
 require_mods(qw(lei -imapd Mail::IMAPClient));
 my ($tmpdir, $for_destroy) = tmpdir;
-my ($ro_home, $cfg_path) = setup_public_inboxes;
 my $expect = eml_load('t/data/0001.patch');
 test_lei({ tmpdir => $tmpdir }, sub {
 	my $home = $ENV{HOME};
diff --git a/t/lei-refresh-mail-sync.t b/t/lei-refresh-mail-sync.t
new file mode 100644
index 00000000..ff558277
--- /dev/null
+++ b/t/lei-refresh-mail-sync.t
@@ -0,0 +1,67 @@
+#!perl -w
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict; use v5.10.1; use PublicInbox::TestCommon;
+require_mods(qw(lei));
+
+my $stop_daemon = sub { # needed since we don't have inotify
+	lei_ok qw(daemon-pid);
+	chomp(my $pid = $lei_out);
+	$pid > 0 or xbail "bad pid: $pid";
+	kill('TERM', $pid) or xbail "kill: $!";
+	for (0..10) {
+		tick;
+		kill(0, $pid) or last;
+	}
+	kill(0, $pid) and xbail "daemon still running (PID:$pid)";
+};
+
+test_lei({ daemon_only => 1 }, sub {
+	my $d = "$ENV{HOME}/d";
+	my ($ro_home, $cfg_path) = setup_public_inboxes;
+	lei_ok qw(daemon-pid);
+	lei_ok qw(add-external), "$ro_home/t2";
+	lei_ok qw(q mid:testmessage@example.com -o), "Maildir:$d";
+	my (@o) = glob("$d/*/*");
+	scalar(@o) == 1 or xbail('multiple results', \@o);
+	my ($bn0) = ($o[0] =~ m!/([^/]+)\z!);
+
+	my $oid = '9bf1002c49eb075df47247b74d69bcd555e23422';
+	lei_ok 'inspect', "blob:$oid";
+	my $before = json_utf8->decode($lei_out);
+	my $exp0 = { 'mail-sync' => { "maildir:$d" => [ $bn0 ] } };
+	is_deeply($before, $exp0, 'inspect shows expected');
+
+	$stop_daemon->();
+	my $dst = $o[0];
+	$dst =~ s/:2,.*\z// and $dst =~ s!/cur/!/new/! and
+		rename($o[0], $dst) or xbail "rename($o[0] => $dst): $!";
+
+	lei_ok 'inspect', "blob:$oid";
+	is_deeply(json_utf8->decode($lei_out),
+		$before, 'inspect unchanged immediately after restart');
+	lei_ok 'refresh-mail-sync', '--all';
+	lei_ok 'inspect', "blob:$oid";
+	my ($bn1) = ($dst =~ m!/([^/]+)\z!);
+	my $exp1 = { 'mail-sync' => { "maildir:$d" => [ $bn1 ] } };
+	is_deeply(json_utf8->decode($lei_out), $exp1,
+		'refresh-mail-sync updated location');
+
+	$stop_daemon->();
+	rename($dst, "$d/unwatched") or xbail "rename $dst out-of-the-way $!";
+
+	lei_ok 'refresh-mail-sync', $d;
+	lei_ok 'inspect', "blob:$oid";
+	is($lei_out, '{}', 'no known locations after "removal"');
+	lei_ok 'refresh-mail-sync', "Maildir:$d";
+
+	$stop_daemon->();
+	rename("$d/unwatched", $dst) or xbail "rename $dst back";
+
+	lei_ok 'refresh-mail-sync', "Maildir:$d";
+	lei_ok 'inspect', "blob:$oid";
+	is_deeply(json_utf8->decode($lei_out), $exp1,
+		'replaced file noted again');
+});
+
+done_testing;

  parent reply	other threads:[~2021-09-16  9:41 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-16  9:41 [PATCH 0/3] lei refresh-mail-sync Eric Wong
2021-09-16  9:41 ` [PATCH 1/3] lei: git_oid: replace git_blob_id Eric Wong
2021-09-16  9:41 ` Eric Wong [this message]
2021-09-16  9:46   ` [SQUASH] fix manifest Eric Wong
2021-09-16  9:41 ` [PATCH 3/3] net_reader: load IO::Socket::Socks in all workers Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210916094116.11457-3-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).