* [PATCH 1/3] lei: git_oid: replace git_blob_id
2021-09-16 9:41 [PATCH 0/3] lei refresh-mail-sync Eric Wong
@ 2021-09-16 9:41 ` Eric Wong
2021-09-16 9:41 ` [PATCH 2/3] lei refresh-mail-sync: replace prune-mail-sync Eric Wong
2021-09-16 9:41 ` [PATCH 3/3] net_reader: load IO::Socket::Socks in all workers Eric Wong
2 siblings, 0 replies; 5+ messages in thread
From: Eric Wong @ 2021-09-16 9:41 UTC (permalink / raw)
To: meta
We'll be using binary SHA-1 and SHA-256 in-memory since that's
what mail_sync.sqlite3 stores.
---
lib/PublicInbox/LEI.pm | 10 +++++++---
lib/PublicInbox/LeiInspect.pm | 3 ++-
lib/PublicInbox/LeiRemote.pm | 2 +-
lib/PublicInbox/LeiStore.pm | 9 +--------
lib/PublicInbox/LeiXSearch.pm | 3 ++-
5 files changed, 13 insertions(+), 14 deletions(-)
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index 0a30bc36..ec103231 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -24,6 +24,8 @@ use PublicInbox::DS qw(now dwaitpid);
use PublicInbox::Spawn qw(spawn popen_rd);
use PublicInbox::Lock;
use PublicInbox::Eml;
+use PublicInbox::Import;
+use PublicInbox::ContentHash qw(git_sha);
use Time::HiRes qw(stat); # ctime comparisons for config cache
use File::Path qw(mkpath);
use File::Spec;
@@ -1479,9 +1481,11 @@ sub refresh_watches {
}
}
-sub git_blob_id {
- my ($lei, $eml) = @_;
- ($lei->{sto} // _lei_store($lei, 1))->git_blob_id($eml);
+# TODO: support SHA-256
+sub git_oid {
+ my $eml = $_[-1];
+ $eml->header_set($_) for @PublicInbox::Import::UNWANTED_HEADERS;
+ git_sha(1, $eml);
}
sub lms { # read-only LeiMailSync
diff --git a/lib/PublicInbox/LeiInspect.pm b/lib/PublicInbox/LeiInspect.pm
index 25bd47e7..2385f7f8 100644
--- a/lib/PublicInbox/LeiInspect.pm
+++ b/lib/PublicInbox/LeiInspect.pm
@@ -202,7 +202,8 @@ sub ins_add { # InputPipe->consume callback
my $str = delete $lei->{istr};
$str =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
my $eml = PublicInbox::Eml->new(\$str);
- _inspect_argv($lei, [ 'blob:'.$lei->git_blob_id($eml),
+ _inspect_argv($lei, [
+ 'blob:'.$lei->git_oid($eml)->hexdigest,
map { "mid:$_" } @{mids($eml)} ]);
};
$lei->{istr} .= $_[1];
diff --git a/lib/PublicInbox/LeiRemote.pm b/lib/PublicInbox/LeiRemote.pm
index 580787c0..8d4ffed0 100644
--- a/lib/PublicInbox/LeiRemote.pm
+++ b/lib/PublicInbox/LeiRemote.pm
@@ -32,7 +32,7 @@ sub _each_mboxrd_eml { # callback for MboxReader->mboxrd
$smsg = $res if ref($res) eq ref($smsg);
}
$smsg->{blob} //= $xoids ? (keys(%$xoids))[0]
- : $lei->git_blob_id($eml);
+ : $lei->git_oid($eml)->hexdigest;
$smsg->populate($eml);
$smsg->{mid} //= '(none)';
push @{$self->{smsg}}, $smsg;
diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm
index 42f574f2..e8bcb04e 100644
--- a/lib/PublicInbox/LeiStore.pm
+++ b/lib/PublicInbox/LeiStore.pm
@@ -20,7 +20,7 @@ use PublicInbox::Eml;
use PublicInbox::Import;
use PublicInbox::InboxWritable qw(eml_from_path);
use PublicInbox::V2Writable;
-use PublicInbox::ContentHash qw(content_hash git_sha);
+use PublicInbox::ContentHash qw(content_hash);
use PublicInbox::MID qw(mids);
use PublicInbox::LeiSearch;
use PublicInbox::MDA;
@@ -603,13 +603,6 @@ sub write_prepare {
$lei->{sto} = $self;
}
-# TODO: support SHA-256
-sub git_blob_id { # called via LEI->git_blob_id
- my ($self, $eml) = @_;
- $eml->header_set($_) for @PublicInbox::Import::UNWANTED_HEADERS;
- git_sha(1, $eml)->hexdigest;
-}
-
# called by lei-daemon before lei->refresh_watches
sub add_sync_folders {
my ($self, @folders) = @_;
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index 556ffd58..50cadb5e 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -275,7 +275,8 @@ sub each_remote_eml { # callback for MboxReader->mboxrd
$smsg->{kw} = []; # short-circuit xsmsg_vmd
}
}
- $smsg->{blob} //= $xoids ? (keys(%$xoids))[0] : $lei->git_blob_id($eml);
+ $smsg->{blob} //= $xoids ? (keys(%$xoids))[0]
+ : $lei->git_oid($eml)->hexdigest;
_smsg_fill($smsg, $eml);
wait_startq($lei);
if ($lei->{-progress}) {
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 2/3] lei refresh-mail-sync: replace prune-mail-sync
2021-09-16 9:41 [PATCH 0/3] lei refresh-mail-sync Eric Wong
2021-09-16 9:41 ` [PATCH 1/3] lei: git_oid: replace git_blob_id Eric Wong
@ 2021-09-16 9:41 ` Eric Wong
2021-09-16 9:46 ` [SQUASH] fix manifest Eric Wong
2021-09-16 9:41 ` [PATCH 3/3] net_reader: load IO::Socket::Socks in all workers Eric Wong
2 siblings, 1 reply; 5+ messages in thread
From: Eric Wong @ 2021-09-16 9:41 UTC (permalink / raw)
To: meta
Merely pruning mail synchronization information was
insufficient for Maildir: renames are common in Maildir
and we need to detect them after-the-fact when lei-daemon
isn't running.
Running this command could make "lei index" far more
useful...
---
MANIFEST | 2 +
lib/PublicInbox/LEI.pm | 3 +-
...PruneMailSync.pm => LeiRefreshMailSync.pm} | 36 +++++++---
lib/PublicInbox/LeiStore.pm | 5 ++
t/lei-export-kw.t | 1 -
t/lei-refresh-mail-sync.t | 67 +++++++++++++++++++
6 files changed, 103 insertions(+), 11 deletions(-)
rename lib/PublicInbox/{LeiPruneMailSync.pm => LeiRefreshMailSync.pm} (70%)
create mode 100644 t/lei-refresh-mail-sync.t
diff --git a/MANIFEST b/MANIFEST
index 640eabd1..221cb992 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -238,6 +238,7 @@ lib/PublicInbox/LeiPmdir.pm
lib/PublicInbox/LeiPruneMailSync.pm
lib/PublicInbox/LeiQuery.pm
lib/PublicInbox/LeiRediff.pm
+lib/PublicInbox/LeiRefreshMailSync.pm
lib/PublicInbox/LeiRemote.pm
lib/PublicInbox/LeiRm.pm
lib/PublicInbox/LeiRmWatch.pm
@@ -450,6 +451,7 @@ t/lei-q-kw.t
t/lei-q-remote-import.t
t/lei-q-save.t
t/lei-q-thread.t
+t/lei-refresh-mail-sync.t
t/lei-sigpipe.t
t/lei-tag.t
t/lei-up.t
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index ec103231..9794497b 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -263,7 +263,7 @@ our %CMD = ( # sorted in order of importance/use:
@net_opt, @c_opt ],
'forget-mail-sync' => [ 'LOCATION...',
'forget sync information for a mail folder', @c_opt ],
-'prune-mail-sync' => [ 'LOCATION...|--all',
+'refresh-mail-sync' => [ 'LOCATION...|--all',
'prune dangling sync data for a mail folder', 'all:s', @c_opt ],
'export-kw' => [ 'LOCATION...|--all',
'one-time export of keywords of sync sources',
@@ -616,6 +616,7 @@ sub pkt_ops {
$ops->{x_it} = [ \&x_it, $lei ];
$ops->{child_error} = [ \&child_error, $lei ];
$ops->{incr} = [ \&incr, $lei ];
+ $ops->{sto_done_request} = [ \&sto_done_request, $lei, $lei->{sock} ];
$ops;
}
diff --git a/lib/PublicInbox/LeiPruneMailSync.pm b/lib/PublicInbox/LeiRefreshMailSync.pm
similarity index 70%
rename from lib/PublicInbox/LeiPruneMailSync.pm
rename to lib/PublicInbox/LeiRefreshMailSync.pm
index 3678bd04..07b0aa52 100644
--- a/lib/PublicInbox/LeiPruneMailSync.pm
+++ b/lib/PublicInbox/LeiRefreshMailSync.pm
@@ -1,16 +1,20 @@
# Copyright (C) 2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-# "lei prune-mail-sync" drops dangling sync information
-package PublicInbox::LeiPruneMailSync;
+# "lei refresh-mail-sync" drops dangling sync information
+# and attempts to detect moved files
+package PublicInbox::LeiRefreshMailSync;
use strict;
use v5.10.1;
use parent qw(PublicInbox::IPC PublicInbox::LeiInput);
use PublicInbox::LeiExportKw;
use PublicInbox::InboxWritable qw(eml_from_path);
+use PublicInbox::ContentHash qw(git_sha);
+use PublicInbox::Import;
sub eml_match ($$) {
my ($eml, $oidbin) = @_;
+ $eml->header_set($_) for @PublicInbox::Import::UNWANTED_HEADERS;
$oidbin eq git_sha(length($oidbin) == 20 ? 1 : 256, $eml)->digest;
}
@@ -20,7 +24,7 @@ sub prune_mdir { # lms->each_src callback
for my $d (@try) {
my $src = "$mdir/$d/$$id";
if ($self->{verify}) {
- my $eml = eml_from_path($src) or next;
+ my $eml = eml_from_path($src) // next;
return if eml_match($eml, $oidbin);
} elsif (-f $src) {
return;
@@ -38,12 +42,27 @@ sub prune_imap { # lms->each_src callback
$self->{lei}->{sto}->ipc_do('lms_clear_src', $url, $uid);
}
+# detects missed file moves
+sub pmdir_cb { # called via LeiPmdir->each_mdir_fn
+ my ($self, $f, $fl) = @_;
+ my ($folder, $bn) = ($f =~ m!\A(.+?)/(?:new|cur)/([^/]+)\z!) or
+ die "BUG: $f was not from a Maildir?";
+ substr($folder, 0, 0) = 'maildir:'; # add prefix
+ my $lms = $self->{-lms_ro} //= $self->{lei}->lms;
+ return if defined($lms->name_oidbin($folder, $bn));
+ my $eml = eml_from_path($f) // return;
+ my $oidbin = $self->{lei}->git_oid($eml)->digest;
+ $self->{lei}->{sto}->ipc_do('lms_set_src', $oidbin, $folder, \$bn);
+}
+
sub input_path_url { # overrides PublicInbox::LeiInput::input_path_url
my ($self, $input, @args) = @_;
my $lms = $self->{-lms_ro} //= $self->{lei}->lms;
if ($input =~ /\Amaildir:(.+)/i) {
- my $mdir = $1;
- $lms->each_src($input, \&prune_mdir, $self, $mdir);
+ $lms->each_src($input, \&prune_mdir, $self, my $mdir = $1);
+ $self->{lse} //= $self->{lei}->{sto}->search;
+ # call pmdir_cb (via maildir_each_file -> each_mdir_fn)
+ PublicInbox::LeiInput::input_path_url($self, $input);
} elsif ($input =~ m!\Aimaps?://!i) {
my $uri = PublicInbox::URIimap->new($input);
my $mic = $self->{lei}->{net}->mic_for_folder($uri);
@@ -51,10 +70,10 @@ sub input_path_url { # overrides PublicInbox::LeiInput::input_path_url
$uids = +{ map { $_ => undef } @$uids };
$lms->each_src($$uri, \&prune_imap, $self, $uids, $$uri);
} else { die "BUG: $input not supported" }
- my $wait = $self->{lei}->{sto}->ipc_do('done');
+ $self->{lei}->{pkt_op_p}->pkt_do('sto_done_request');
}
-sub lei_prune_mail_sync {
+sub lei_refresh_mail_sync {
my ($lei, @folders) = @_;
my $sto = $lei->_lei_store or return $lei->fail(<<EOM);
lei/store uninitialized, see lei-import(1)
@@ -78,7 +97,6 @@ EOM
$self->prepare_inputs($lei, \@folders) or return;
my $j = $lei->{opt}->{jobs} || scalar(@{$self->{inputs}}) || 1;
my $ops = {};
- $sto->write_prepare($lei);
$lei->{auth}->op_merge($ops, $self) if $lei->{auth};
$self->{-wq_nr_workers} = $j // 1; # locked
(my $op_c, $ops) = $lei->workers_start($self, $j, $ops);
@@ -89,7 +107,7 @@ EOM
}
no warnings 'once';
-*_complete_prune_mail_sync = \&PublicInbox::LeiExportKw::_complete_export_kw;
+*_complete_refresh_mail_sync = \&PublicInbox::LeiExportKw::_complete_export_kw;
*ipc_atfork_child = \&PublicInbox::LeiInput::input_only_atfork_child;
*net_merge_all_done = \&PublicInbox::LeiInput::input_only_net_merge_all_done;
diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm
index e8bcb04e..32f55abd 100644
--- a/lib/PublicInbox/LeiStore.pm
+++ b/lib/PublicInbox/LeiStore.pm
@@ -293,6 +293,11 @@ sub set_sync_info {
_lms_rw($self)->set_src(pack('H*', $oidhex), $folder, $id);
}
+sub lms_set_src {
+ my ($self, $oidbin, $folder, $id) = @_;
+ _lms_rw($self)->set_src($oidbin, $folder, $id);
+}
+
sub _remove_if_local { # git->cat_async arg
my ($bref, $oidhex, $type, $size, $self) = @_;
$self->{im}->remove($bref) if $bref;
diff --git a/t/lei-export-kw.t b/t/lei-export-kw.t
index 9531949a..1fe940bb 100644
--- a/t/lei-export-kw.t
+++ b/t/lei-export-kw.t
@@ -6,7 +6,6 @@ use File::Copy qw(cp);
use File::Path qw(make_path);
require_mods(qw(lei -imapd Mail::IMAPClient));
my ($tmpdir, $for_destroy) = tmpdir;
-my ($ro_home, $cfg_path) = setup_public_inboxes;
my $expect = eml_load('t/data/0001.patch');
test_lei({ tmpdir => $tmpdir }, sub {
my $home = $ENV{HOME};
diff --git a/t/lei-refresh-mail-sync.t b/t/lei-refresh-mail-sync.t
new file mode 100644
index 00000000..ff558277
--- /dev/null
+++ b/t/lei-refresh-mail-sync.t
@@ -0,0 +1,67 @@
+#!perl -w
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict; use v5.10.1; use PublicInbox::TestCommon;
+require_mods(qw(lei));
+
+my $stop_daemon = sub { # needed since we don't have inotify
+ lei_ok qw(daemon-pid);
+ chomp(my $pid = $lei_out);
+ $pid > 0 or xbail "bad pid: $pid";
+ kill('TERM', $pid) or xbail "kill: $!";
+ for (0..10) {
+ tick;
+ kill(0, $pid) or last;
+ }
+ kill(0, $pid) and xbail "daemon still running (PID:$pid)";
+};
+
+test_lei({ daemon_only => 1 }, sub {
+ my $d = "$ENV{HOME}/d";
+ my ($ro_home, $cfg_path) = setup_public_inboxes;
+ lei_ok qw(daemon-pid);
+ lei_ok qw(add-external), "$ro_home/t2";
+ lei_ok qw(q mid:testmessage@example.com -o), "Maildir:$d";
+ my (@o) = glob("$d/*/*");
+ scalar(@o) == 1 or xbail('multiple results', \@o);
+ my ($bn0) = ($o[0] =~ m!/([^/]+)\z!);
+
+ my $oid = '9bf1002c49eb075df47247b74d69bcd555e23422';
+ lei_ok 'inspect', "blob:$oid";
+ my $before = json_utf8->decode($lei_out);
+ my $exp0 = { 'mail-sync' => { "maildir:$d" => [ $bn0 ] } };
+ is_deeply($before, $exp0, 'inspect shows expected');
+
+ $stop_daemon->();
+ my $dst = $o[0];
+ $dst =~ s/:2,.*\z// and $dst =~ s!/cur/!/new/! and
+ rename($o[0], $dst) or xbail "rename($o[0] => $dst): $!";
+
+ lei_ok 'inspect', "blob:$oid";
+ is_deeply(json_utf8->decode($lei_out),
+ $before, 'inspect unchanged immediately after restart');
+ lei_ok 'refresh-mail-sync', '--all';
+ lei_ok 'inspect', "blob:$oid";
+ my ($bn1) = ($dst =~ m!/([^/]+)\z!);
+ my $exp1 = { 'mail-sync' => { "maildir:$d" => [ $bn1 ] } };
+ is_deeply(json_utf8->decode($lei_out), $exp1,
+ 'refresh-mail-sync updated location');
+
+ $stop_daemon->();
+ rename($dst, "$d/unwatched") or xbail "rename $dst out-of-the-way $!";
+
+ lei_ok 'refresh-mail-sync', $d;
+ lei_ok 'inspect', "blob:$oid";
+ is($lei_out, '{}', 'no known locations after "removal"');
+ lei_ok 'refresh-mail-sync', "Maildir:$d";
+
+ $stop_daemon->();
+ rename("$d/unwatched", $dst) or xbail "rename $dst back";
+
+ lei_ok 'refresh-mail-sync', "Maildir:$d";
+ lei_ok 'inspect', "blob:$oid";
+ is_deeply(json_utf8->decode($lei_out), $exp1,
+ 'replaced file noted again');
+});
+
+done_testing;
^ permalink raw reply related [flat|nested] 5+ messages in thread