"lei import" now tracks IMAP and Maildir source information in preparation for propagating keywords back to IMAP|Maildir. "lei inspect" is a long-overdue debug/diagnostic thing. Eric Wong (7): lei_input: drop outdated comment w.r.t. compression t/lei_to_mail: split "lei import" test $HOME directory URIimap: support ->uidvalidity and ->iuid net_reader: imap_each: add UIDVALIDITY to URL arg doc: lei_design_notes: add a bit on WAL usage lei_mail_sync: for bidirectional keyword sync lei import: keep sync info for Maildir and IMAP folders Documentation/lei_design_notes.txt | 12 ++ MANIFEST | 3 + lib/PublicInbox/LEI.pm | 16 ++- lib/PublicInbox/LeiImport.pm | 22 ++- lib/PublicInbox/LeiInput.pm | 42 +++++- lib/PublicInbox/LeiInspect.pm | 96 +++++++++++++ lib/PublicInbox/LeiMailSync.pm | 211 +++++++++++++++++++++++++++++ lib/PublicInbox/LeiSearch.pm | 7 + lib/PublicInbox/LeiStore.pm | 20 ++- lib/PublicInbox/NetReader.pm | 11 +- lib/PublicInbox/TestCommon.pm | 2 + lib/PublicInbox/URIimap.pm | 38 +++++- t/lei-import-imap.t | 27 +++- t/lei-import-maildir.t | 21 +++ t/lei_mail_sync.t | 68 ++++++++++ t/lei_to_mail.t | 8 +- t/net_reader-imap.t | 4 +- t/uri_imap.t | 32 ++++- 18 files changed, 612 insertions(+), 28 deletions(-) create mode 100644 lib/PublicInbox/LeiInspect.pm create mode 100644 lib/PublicInbox/LeiMailSync.pm create mode 100644 t/lei_mail_sync.t
Followup-to: 49b036771ef3bf45 ("lei_input: support compressed mboxes") --- lib/PublicInbox/LeiInput.pm | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/PublicInbox/LeiInput.pm b/lib/PublicInbox/LeiInput.pm index de60a076..0114f5ee 100644 --- a/lib/PublicInbox/LeiInput.pm +++ b/lib/PublicInbox/LeiInput.pm @@ -50,7 +50,6 @@ sub check_input_format ($;$) { return 1 if $fmt eq 'eml'; require PublicInbox::MboxLock if $files; require PublicInbox::MboxReader; - # XXX: should this handle {gz,bz2,xz}? that's currently in LeiToMail PublicInbox::MboxReader->reads($fmt) or return $lei->fail("--$opt_key=$fmt unrecognized"); 1;
"lei import" behavior will may change w.r.t. keyword handling. Use separate $HOME between different test_lei to ensure isolation between the tests. --- lib/PublicInbox/TestCommon.pm | 2 ++ t/lei_to_mail.t | 8 ++++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm index b5d0b9f8..49cecacd 100644 --- a/lib/PublicInbox/TestCommon.pm +++ b/lib/PublicInbox/TestCommon.pm @@ -517,6 +517,7 @@ SKIP: { require_git(2.6, 1) or skip('git 2.6+ required for lei test', 2); require_mods(qw(json DBD::SQLite Search::Xapian), 2); require PublicInbox::Config; + require File::Path; local %ENV = %ENV; delete $ENV{XDG_DATA_HOME}; delete $ENV{XDG_CONFIG_HOME}; @@ -534,6 +535,7 @@ EOM $lei_opt = { 1 => \$lei_out, 2 => \$lei_err }; my ($daemon_pid, $for_destroy, $daemon_xrd); my $tmpdir = $test_opt->{tmpdir}; + File::Path::mkpath($tmpdir) if (defined $tmpdir && !-d $tmpdir); ($tmpdir, $for_destroy) = tmpdir unless $tmpdir; state $persist_xrd = $ENV{TEST_LEI_DAEMON_PERSIST_DIR}; SKIP: { diff --git a/t/lei_to_mail.t b/t/lei_to_mail.t index 51357257..32532a98 100644 --- a/t/lei_to_mail.t +++ b/t/lei_to_mail.t @@ -129,9 +129,9 @@ my $orig = do { $raw; }; -test_lei(sub { - ok(lei(qw(import -F), $mbox, $fn), 'imported mbox'); - ok(lei(qw(q s:x)), 'lei q works') or diag $lei_err; +test_lei({tmpdir => "$tmpdir/using -F"}, sub { + lei_ok(qw(import -F), $mbox, $fn, \'imported mbox'); + lei_ok(qw(q s:x), \'lei q works') or diag $lei_err; my $res = json_utf8->decode($lei_out); my $x = $res->[0]; is($x->{'s'}, 'x', 'subject imported') or diag $lei_out; @@ -139,7 +139,7 @@ test_lei(sub { is($res->[1], undef, 'only one result'); }); -test_lei(sub { +test_lei({tmpdir => "$tmpdir/using TYPE: prefix"}, sub { lei_ok('import', "$mbox:$fn", \'imported mbox:/path') or diag $lei_err; lei_ok(qw(q s:x), \'lei q works') or diag $lei_err; my $res = json_utf8->decode($lei_out);
These will be useful for keyword synchronization, and perhaps importing a single IMAP message with ->iuid. --- lib/PublicInbox/URIimap.pm | 38 ++++++++++++++++++++++++++++++++++---- t/uri_imap.t | 32 +++++++++++++++++++++++++++++++- 2 files changed, 65 insertions(+), 5 deletions(-) diff --git a/lib/PublicInbox/URIimap.pm b/lib/PublicInbox/URIimap.pm index db84ee5e..dc193468 100644 --- a/lib/PublicInbox/URIimap.pm +++ b/lib/PublicInbox/URIimap.pm @@ -5,8 +5,9 @@ # This depends only on the documented public API of the `URI' dist, # not on internal `_'-prefixed subclasses such as `URI::_server' # -# <https://metacpan.org/pod/URI::imap> exists, but it's not in -# common distros. +# <https://metacpan.org/pod/URI::imap> exists, but it appears +# unmaintained, isn't in common distros, nor does it support +# ';FOO=BAR' parameters such as UIDVALIDITY # # RFC 2192 also describes ";TYPE=<list_type>" package PublicInbox::URIimap; @@ -56,7 +57,7 @@ sub path { my ($self) = @_; my (undef, undef, $path) = uri_split($$self); $path =~ s!\A/+!!; - $path =~ s/;.*\z//; # ;UIDVALIDITY=nz-number + $path =~ s![/;].*\z!!; # [;UIDVALIDITY=nz-number]/;UID=nz-number $path eq '' ? undef : $path; } @@ -66,7 +67,36 @@ sub mailbox { defined($path) ? uri_unescape($path) : undef; } -# TODO: UIDVALIDITY, search, and other params +sub uidvalidity { # read/write + my ($self, $val) = @_; + my ($scheme, $auth, $path, $query, $frag) = uri_split($$self); + if (defined $val) { + if ($path =~ s!;UIDVALIDITY=[^;/]*\b!;UIDVALIDITY=$val!i or + $path =~ s!/;!;UIDVALIDITY=$val/;!i) { + # s// already changed it + } else { # both s// failed, so just append + $path .= ";UIDVALIDITY=$val"; + } + $$self = uri_join($scheme, $auth, $path, $query, $frag); + } + $path =~ s!\A/+!!; + $path =~ m!\A[^;/]+;UIDVALIDITY=([1-9][0-9]*)\b!i ? ($1 + 0) : undef; +} + +sub iuid { + my ($self, $val) = @_; + my ($scheme, $auth, $path, $query, $frag) = uri_split($$self); + if (defined $val) { + if ($path =~ s!/;UID=[^;/]*\b!/;UID=$val!i) { + # s// already changed it + } else { # both s// failed, so just append + $path .= ";UID=$val"; + } + $$self = uri_join($scheme, $auth, $path, $query); + } + $path =~ m!\A/[^/;]+(?:;UIDVALIDITY=[^;/]+)?/;UID=([1-9][0-9]*)\b!i ? + ($1 + 0) : undef; +} sub port { my ($self) = @_; diff --git a/t/uri_imap.t b/t/uri_imap.t index f7c78665..e2aadf84 100644 --- a/t/uri_imap.t +++ b/t/uri_imap.t @@ -54,6 +54,7 @@ is(PublicInbox::URIimap->new('imaps://0:993/')->canonical->as_string, $uri = PublicInbox::URIimap->new('imap://NSA:Hunter2@0/INBOX'); is($uri->user, 'NSA'); is($uri->password, 'Hunter2'); +is($uri->uidvalidity, undef, 'no UIDVALIDITY'); $uri = PublicInbox::URIimap->new('imap://0/%'); is($uri->mailbox, '%', "RFC 2192 '%' supported"); @@ -61,6 +62,35 @@ $uri = PublicInbox::URIimap->new('imap://0/%25'); $uri = PublicInbox::URIimap->new('imap://0/*'); is($uri->mailbox, '*', "RFC 2192 '*' supported"); -# TODO: support UIDVALIDITY and other params +$uri = PublicInbox::URIimap->new('imap://0/mmm;UIDVALIDITY=1'); +is($uri->mailbox, 'mmm', 'mailbox works with UIDVALIDITY'); +is($uri->uidvalidity, 1, 'single-digit UIDVALIDITY'); +$uri = PublicInbox::URIimap->new('imap://0/mmm;UIDVALIDITY=21'); +is($uri->uidvalidity, 21, 'multi-digit UIDVALIDITY'); +$uri = PublicInbox::URIimap->new('imap://0/mmm;UIDVALIDITY=bogus'); +is($uri->uidvalidity, undef, 'bogus UIDVALIDITY'); +is($uri->uidvalidity(2), 2, 'iuid set'); +is($$uri, 'imap://0/mmm;UIDVALIDITY=2', 'bogus uidvalidity replaced'); +is($uri->uidvalidity(13), 13, 'iuid set'); +is($$uri, 'imap://0/mmm;UIDVALIDITY=13', 'valid uidvalidity replaced'); + +$uri = PublicInbox::URIimap->new('imap://0/mmm'); +is($uri->uidvalidity(2), 2, 'iuid set'); +is($$uri, 'imap://0/mmm;UIDVALIDITY=2', 'uidvalidity appended'); +is($uri->iuid, undef, 'no iuid'); + +$uri = PublicInbox::URIimap->new('imap://0/mmm/;uid=8'); +is($uri->mailbox, 'mmm', 'mailbox works with iuid'); +is($uri->iuid, 8, 'iuid extracted'); +is($uri->iuid(9), 9, 'iuid set'); +is($$uri, 'imap://0/mmm/;UID=9', 'correct iuid when stringified'); +is($uri->uidvalidity(1), 1, 'set uidvalidity with iuid'); +is($$uri, 'imap://0/mmm;UIDVALIDITY=1/;UID=9', + 'uidvalidity added with iuid'); +is($uri->uidvalidity(4), 4, 'set uidvalidity with iuid'); +is($$uri, 'imap://0/mmm;UIDVALIDITY=4/;UID=9', + 'uidvalidity replaced with iuid'); +is($uri->iuid(3), 3, 'iuid set with uidvalidity'); +is($$uri, 'imap://0/mmm;UIDVALIDITY=4/;UID=3', 'iuid replaced properly'); done_testing;
This will allow the callback to reliably maintain OID <=> UID mappings between lei/store and the IMAP folder. --- lib/PublicInbox/NetReader.pm | 11 +++++++---- t/net_reader-imap.t | 4 +++- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/lib/PublicInbox/NetReader.pm b/lib/PublicInbox/NetReader.pm index 39129b34..5978752f 100644 --- a/lib/PublicInbox/NetReader.pm +++ b/lib/PublicInbox/NetReader.pm @@ -347,7 +347,7 @@ sub errors { } sub _imap_do_msg ($$$$$) { - my ($self, $uri, $uid, $raw, $flags) = @_; + my ($self, $url, $uid, $raw, $flags) = @_; # our target audience expects LF-only, save storage $$raw =~ s/\r\n/\n/sg; my $kw = []; @@ -358,12 +358,12 @@ sub _imap_do_msg ($$$$$) { } elsif ($f eq "\\Deleted") { # not in JMAP return; } elsif ($self->{verbose}) { - warn "# unknown IMAP flag $f <$uri;uid=$uid>\n"; + warn "# unknown IMAP flag $f <$url/;UID=$uid>\n"; } } @$kw = sort @$kw; # for all UI/UX purposes my ($eml_cb, @args) = @{$self->{eml_each}}; - $eml_cb->($uri, $uid, $kw, PublicInbox::Eml->new($raw), @args); + $eml_cb->($url, $uid, $kw, PublicInbox::Eml->new($raw), @args); } sub run_commit_cb ($) { @@ -396,6 +396,9 @@ sub _imap_fetch_all ($$$) { return "E: $uri cannot get UIDVALIDITY"; $r_uidnext //= $mic->uidnext($mbx) // return "E: $uri cannot get UIDNEXT"; + my $url = ref($uri)->new($$uri); + $url->uidvalidity($r_uidval); + $url = $$url; my $itrk = _itrk($self, $uri); my $l_uid; $l_uid = $itrk->get_last($r_uidval) if $itrk; @@ -455,7 +458,7 @@ sub _imap_fetch_all ($$$) { # messages get deleted, so holes appear my $per_uid = delete $r->{$uid} // next; my $raw = delete($per_uid->{$key}) // next; - _imap_do_msg($self, $uri, $uid, \$raw, + _imap_do_msg($self, $url, $uid, \$raw, $per_uid->{FLAGS}); $last_uid = $uid; last if $self->{quit}; diff --git a/t/net_reader-imap.t b/t/net_reader-imap.t index e478ee07..5de8f92b 100644 --- a/t/net_reader-imap.t +++ b/t/net_reader-imap.t @@ -33,7 +33,9 @@ is(scalar(@w), 0, 'no warnings'); ok($nr, 'got some emails'); is($eml{'PublicInbox::Eml'}, $nr, 'got expected Eml objects'); is(scalar keys %eml, 1, 'only got Eml objects'); -is($urls{$url}, $nr, 'one URL expected number of times'); +is(scalar(grep(/\A\Q$url\E;UIDVALIDITY=\d+\z/, keys %urls)), scalar(keys %urls), + 'UIDVALIDITY added to URL passed to callback'); +is_deeply([values %urls], [$nr], 'one URL expected number of times'); is(scalar keys %urls, 1, 'only got one URL'); is($args{blah}, $nr, 'got arg expected number of times'); is(scalar keys %args, 1, 'only got one arg');
--- Documentation/lei_design_notes.txt | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/Documentation/lei_design_notes.txt b/Documentation/lei_design_notes.txt index a5606c05..f1d2ab6f 100644 --- a/Documentation/lei_design_notes.txt +++ b/Documentation/lei_design_notes.txt @@ -18,3 +18,15 @@ SQLite, and Xapian across multiple processes. The coupling of IMAP and NNTP network latency to local storage is a current weakness of public-inbox-watch. Therefore, -watch will likely adopt the daemon architecture of lei in the future. + +Read/write vs read-only storage +------------------------------- + +public-inboxes are intended to be written and read by different +Unix users. Commonly, a single Unix user or group will write to +a public-inbox, but the inbox will be served by a user with +read-only permissions (e.g. "www-data" or "nobody"). + +lei/store is intended to be read and written by a single user, +thus we can rely on the Write-Ahead-Log journal of SQLite to +improve performance: <https://sqlite.org/wal.html>
We'll be using the new class to efficiently propagate keyword changes from lei/store back to Maildir or IMAP folders. --- MANIFEST | 2 + lib/PublicInbox/LeiMailSync.pm | 211 +++++++++++++++++++++++++++++++++ t/lei_mail_sync.t | 68 +++++++++++ 3 files changed, 281 insertions(+) create mode 100644 lib/PublicInbox/LeiMailSync.pm create mode 100644 t/lei_mail_sync.t diff --git a/MANIFEST b/MANIFEST index e0f9c35b..abaf54b0 100644 --- a/MANIFEST +++ b/MANIFEST @@ -201,6 +201,7 @@ lib/PublicInbox/LeiInit.pm lib/PublicInbox/LeiInput.pm lib/PublicInbox/LeiLsLabel.pm lib/PublicInbox/LeiLsSearch.pm +lib/PublicInbox/LeiMailSync.pm lib/PublicInbox/LeiMirror.pm lib/PublicInbox/LeiOverview.pm lib/PublicInbox/LeiP2q.pm @@ -407,6 +408,7 @@ t/lei-tag.t t/lei.t t/lei_dedupe.t t/lei_external.t +t/lei_mail_sync.t t/lei_overview.t t/lei_saved_search.t t/lei_store.t diff --git a/lib/PublicInbox/LeiMailSync.pm b/lib/PublicInbox/LeiMailSync.pm new file mode 100644 index 00000000..52f26d69 --- /dev/null +++ b/lib/PublicInbox/LeiMailSync.pm @@ -0,0 +1,211 @@ +# Copyright (C) 2021 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> + +# for maintaining synchronization between lei/store <=> Maildir|MH|IMAP|JMAP +package PublicInbox::LeiMailSync; +use strict; +use v5.10.1; +use DBI; + +sub dbh_new { + my ($self, $rw) = @_; + my $f = $self->{filename}; + my $creat; + if (!-f $f && $rw) { + require PublicInbox::Spawn; + open my $fh, '+>>', $f or die "failed to open $f: $!"; + PublicInbox::Spawn::nodatacow_fd(fileno($fh)); + $creat = 1; + } + my $dbh = DBI->connect("dbi:SQLite:dbname=$f",'','', { + AutoCommit => 1, + RaiseError => 1, + PrintError => 0, + ReadOnly => !$rw, + sqlite_use_immediate_transaction => 1, + }); + # no sqlite_unicode, here, all strings are binary + create_tables($dbh) if $rw; + $dbh->do('PRAGMA journal_mode = WAL') if $creat; + $dbh->do('PRAGMA case_sensitive_like = ON'); + $dbh; +} + +sub new { + my ($cls, $f) = @_; + bless { filename => $f, fmap => {} }, $cls; +} + +sub lms_commit { delete($_[0]->{dbh})->commit } + +sub lms_begin { ($_[0]->{dbh} //= dbh_new($_[0], 1))->begin_work }; + +sub create_tables { + my ($dbh) = @_; + + $dbh->do(<<''); +CREATE TABLE IF NOT EXISTS folders ( + fid INTEGER PRIMARY KEY, + loc VARBINARY NOT NULL, /* URL;UIDVALIDITY=$N or $TYPE:/pathname */ + UNIQUE (loc) +) + + $dbh->do(<<''); +CREATE TABLE IF NOT EXISTS blob2num ( + oidbin VARBINARY NOT NULL, + fid INTEGER NOT NULL, /* folder ID */ + uid INTEGER NOT NULL, /* NNTP article number, IMAP UID, MH number */ + UNIQUE (oidbin, fid, uid) +) + + $dbh->do(<<''); +CREATE TABLE IF NOT EXISTS blob2name ( + oidbin VARBINARY NOT NULL, + fid INTEGER NOT NULL, /* folder ID */ + name VARBINARY NOT NULL, /* Maildir basename, JMAP blobId */ + UNIQUE (oidbin, fid, name) +) + +} + +sub _fid_for { + my ($self, $folder, $rw) = @_; + my $dbh = $self->{dbh}; + my ($row) = $dbh->selectrow_array(<<'', undef, $folder); +SELECT fid FROM folders WHERE loc = ? LIMIT 1 + + return $row if defined $row; + return unless $rw; + + ($row) = $dbh->selectrow_array('SELECT MAX(fid) FROM folders'); + + my $fid = ($row // 0) + 1; + # in case we're reusing, clobber existing stale refs: + $dbh->do('DELETE FROM blob2name WHERE fid = ?', undef, $fid); + $dbh->do('DELETE FROM blob2num WHERE fid = ?', undef, $fid); + + my $sth = $dbh->prepare('INSERT INTO folders (fid, loc) VALUES (?, ?)'); + $sth->execute($fid, $folder); + + $fid; +} + +sub set_src { + my ($self, $oidhex, $folder, $id) = @_; + my $fid = $self->{fmap}->{$folder} //= _fid_for($self, $folder, 1); + my $sth; + if (ref($id)) { # scalar name + $id = $$id; + $sth = $self->{dbh}->prepare_cached(<<''); +INSERT OR IGNORE INTO blob2name (oidbin, fid, name) VALUES (?, ?, ?) + + } else { # numeric ID (IMAP UID, MH number) + $sth = $self->{dbh}->prepare_cached(<<''); +INSERT OR IGNORE INTO blob2num (oidbin, fid, uid) VALUES (?, ?, ?) + + } + $sth->execute(pack('H*', $oidhex), $fid, $id); +} + +sub clear_src { + my ($self, $folder, $id) = @_; + my $fid = $self->{fmap}->{$folder} //= _fid_for($self, $folder, 1); + my $sth; + if (ref($id)) { # scalar name + $id = $$id; + $sth = $self->{dbh}->prepare_cached(<<''); +DELETE FROM blob2name WHERE fid = ? AND name = ? + + } else { + $sth = $self->{dbh}->prepare_cached(<<''); +DELETE FROM blob2num WHERE fid = ? AND uid = ? + + } + $sth->execute($fid, $id); +} + +# read-only, iterates every oidbin + UID or name for a given folder +sub each_src { + my ($self, $folder, $cb, @args) = @_; + my $dbh = $self->{dbh} //= dbh_new($self); + my ($fid, $sth); + $fid = $self->{fmap}->{$folder} //= _fid_for($self, $folder) // return; + $sth = $dbh->prepare('SELECT oidbin,uid FROM blob2num WHERE fid = ?'); + $sth->execute($fid); + while (my ($oidbin, $id) = $sth->fetchrow_array) { + $cb->($oidbin, $id, @args); + } + $sth = $dbh->prepare('SELECT oidbin,name FROM blob2name WHERE fid = ?'); + $sth->execute($fid); + while (my ($oidbin, $id) = $sth->fetchrow_array) { + $cb->($oidbin, \$id, @args); + } +} + +sub location_stats { + my ($self, $folder, $cb, @args) = @_; + my $dbh = $self->{dbh} //= dbh_new($self); + my $fid; + my $ret = {}; + $fid = $self->{fmap}->{$folder} //= _fid_for($self, $folder) // return; + my ($row) = $dbh->selectrow_array(<<"", undef, $fid); +SELECT COUNT(name) FROM blob2name WHERE fid = ? + + $ret->{'name.count'} = $row if $row; + for my $op (qw(count min max)) { + ($row) = $dbh->selectrow_array(<<"", undef, $fid); +SELECT $op(uid) FROM blob2num WHERE fid = ? + + $row or last; + $ret->{"uid.$op"} = $row; + } + $ret; +} + +# returns a { location => [ list-of-ids-or-names ] } mapping +sub locations_for { + my ($self, $oidhex) = @_; + my ($fid, $sth, $id, %fid2id); + my $dbh = $self->{dbh} //= dbh_new($self); + $sth = $dbh->prepare('SELECT fid,uid FROM blob2num WHERE oidbin = ?'); + $sth->execute(pack('H*', $oidhex)); + while (my ($fid, $uid) = $sth->fetchrow_array) { + push @{$fid2id{$fid}}, $uid; + } + $sth = $dbh->prepare('SELECT fid,name FROM blob2name WHERE oidbin = ?'); + $sth->execute(pack('H*', $oidhex)); + while (my ($fid, $name) = $sth->fetchrow_array) { + push @{$fid2id{$fid}}, $name; + } + $sth = $dbh->prepare('SELECT loc FROM folders WHERE fid = ? LIMIT 1'); + my $ret = {}; + while (my ($fid, $ids) = each %fid2id) { + $sth->execute($fid); + my ($loc) = $sth->fetchrow_array; + unless (defined $loc) { + warn "E: fid=$fid for $oidhex unknown:\n", map { + 'E: '.(ref() ? $$_ : "#$_")."\n"; + } @$ids; + next; + } + $ret->{$loc} = $ids; + } + scalar(keys %$ret) ? $ret : undef; +} + +# returns a list of folders used for completion +sub folders { + my ($self, $pfx) = @_; + my $dbh = $self->{dbh} //= dbh_new($self); + my $sql = 'SELECT loc FROM folders'; + my @pfx; + if (defined $pfx) { + $sql .= ' WHERE loc LIKE ? ESCAPE ?'; + @pfx = ($pfx, '\\'); + $pfx[0] =~ s/([%_\\])/\\$1/g; # glob chars + $pfx[0] .= '%'; + } + map { $_->[0] } @{$dbh->selectall_arrayref($sql, undef, @pfx)}; +} + +1; diff --git a/t/lei_mail_sync.t b/t/lei_mail_sync.t new file mode 100644 index 00000000..864d6e48 --- /dev/null +++ b/t/lei_mail_sync.t @@ -0,0 +1,68 @@ +#!perl -w +# Copyright (C) 2021 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +use strict; +use v5.10.1; +use PublicInbox::TestCommon; +require_mods(qw(DBD::SQLite)); +require_ok 'PublicInbox::LeiMailSync'; +my ($dir, $for_destroy) = tmpdir(); +my $lms = PublicInbox::LeiMailSync->new("$dir/t.sqlite3"); + +$lms->lms_begin; +$lms->lms_commit; +my $ro = PublicInbox::LeiMailSync->new("$dir/t.sqlite3"); +is_deeply([$ro->folders], [], 'no folders, yet'); + +my $imap = 'imaps://bob@[::1]/INBOX;UIDVALIDITY=9'; +$lms->lms_begin; +is($lms->set_src('deadbeef', $imap, 1), 1, 'set IMAP once'); +ok($lms->set_src('deadbeef', $imap, 1) == 0, 'set IMAP idempotently'); +$lms->lms_commit; +is_deeply([$ro->folders], [$imap], 'IMAP folder added'); +is_deeply([$ro->folders($imap)], [$imap], 'IMAP folder with full GLOB'); +is_deeply([$ro->folders('imaps://bob@[::1]/INBOX')], [$imap], + 'IMAP folder with partial GLOB'); + +is_deeply($ro->locations_for('deadbeef'), + { $imap => [ 1 ] }, 'locations_for w/ imap'); + +my $maildir = 'maildir:/home/user/md'; +my $fname = 'foo:2,S'; +$lms->lms_begin; +ok($lms->set_src('deadbeef', $maildir, \$fname), 'set Maildir once'); +ok($lms->set_src('deadbeef', $maildir, \$fname) == 0, 'set Maildir again'); +$lms->lms_commit; +is_deeply($ro->locations_for('deadbeef'), + { $imap => [ 1 ], $maildir => [ $fname ] }, + 'locations_for w/ maildir + imap'); + +is_deeply([sort($ro->folders)], [$imap, $maildir], 'both folders shown'); +my @res; +$ro->each_src($maildir, sub { + my ($oidbin, $id) = @_; + push @res, [ unpack('H*', $oidbin), $id ]; +}); +is_deeply(\@res, [ ['deadbeef', \$fname] ], 'each_src works on Maildir'); + +@res = (); +$ro->each_src($imap, sub { + my ($oidbin, $id) = @_; + push @res, [ unpack('H*', $oidbin), $id ]; +}); +is_deeply(\@res, [ ['deadbeef', 1] ], 'each_src works on IMAP'); + +is_deeply($ro->location_stats($maildir), { 'name.count' => 1 }, + 'Maildir location stats'); +is_deeply($ro->location_stats($imap), + { 'uid.count' => 1, 'uid.max' => 1, 'uid.min' => 1 }, + 'IMAP location stats'); +$lms->lms_begin; +is($lms->clear_src($imap, 1), 1, 'clear_src on IMAP'); +is($lms->clear_src($maildir, \$fname), 1, 'clear_src on Maildir'); +ok($lms->clear_src($imap, 1) == 0, 'clear_src again on IMAP'); +ok($lms->clear_src($maildir, \$fname) == 0, 'clear_src again on Maildir'); +$lms->lms_commit; +is_deeply($ro->location_stats($maildir), {}, 'nothing left'); + +done_testing;
We aren't using it, yet, but the plan is to be able to use this information to propagate keyword changes back to IMAP and Maildir folders using some to-be-implemented command. "lei inspect" is a half-baked new command to make testing this change easier. It will be updated to support more SQLite+Xapian introspection duties in the future, including public-inbox things independent of lei. --- MANIFEST | 1 + lib/PublicInbox/LEI.pm | 16 ++++-- lib/PublicInbox/LeiImport.pm | 22 ++++++-- lib/PublicInbox/LeiInput.pm | 41 +++++++++++++-- lib/PublicInbox/LeiInspect.pm | 96 +++++++++++++++++++++++++++++++++++ lib/PublicInbox/LeiSearch.pm | 7 +++ lib/PublicInbox/LeiStore.pm | 20 +++++++- t/lei-import-imap.t | 27 +++++++++- t/lei-import-maildir.t | 21 ++++++++ 9 files changed, 238 insertions(+), 13 deletions(-) create mode 100644 lib/PublicInbox/LeiInspect.pm diff --git a/MANIFEST b/MANIFEST index abaf54b0..79d393c5 100644 --- a/MANIFEST +++ b/MANIFEST @@ -199,6 +199,7 @@ lib/PublicInbox/LeiHelp.pm lib/PublicInbox/LeiImport.pm lib/PublicInbox/LeiInit.pm lib/PublicInbox/LeiInput.pm +lib/PublicInbox/LeiInspect.pm lib/PublicInbox/LeiLsLabel.pm lib/PublicInbox/LeiLsSearch.pm lib/PublicInbox/LeiMailSync.pm diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index 9f49fc03..39278de6 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -64,9 +64,13 @@ sub opt_dash ($$) { ($spec, '<>' => $cb, $GLP_PASS) # for Getopt::Long } -sub rel2abs ($$) { +# rel2abs preserves symlinks in parent, unlike abs_path +sub rel2abs { my ($self, $p) = @_; - return $p if index($p, '/') == 0; # already absolute + if (index($p, '/') == 0) { # already absolute + $p =~ tr!/!/!s; # squeeze redundant slashes + return $p; + } my $pwd = $self->{env}->{PWD}; my $cwd; if (defined $pwd) { @@ -84,6 +88,9 @@ sub rel2abs ($$) { File::Spec->rel2abs($p, $pwd); } +# abs_path resolves symlinks in parent iff all parents exist +sub abs_path { Cwd::abs_path($_[1]) // rel2abs(@_) } + sub share_path ($) { # $HOME/.local/share/lei/$FOO my ($self) = @_; rel2abs($self, ($self->{env}->{XDG_DATA_HOME} // @@ -193,7 +200,7 @@ our %CMD = ( # sorted in order of importance/use: 'import' => [ 'LOCATION...|--stdin', 'one-time import/update from URL or filesystem', qw(stdin| offset=i recursive|r exclude=s include|I=s - lock=s@ in-format|F=s kw! verbose|v+ incremental!), @c_opt ], + lock=s@ in-format|F=s kw! verbose|v+ incremental! sync!), @c_opt ], 'convert' => [ 'LOCATION...|--stdin', 'one-time conversion from URL or filesystem to another format', qw(stdin| in-format|F=s out-format|f=s output|mfolder|o=s @@ -205,6 +212,9 @@ our %CMD = ( # sorted in order of importance/use: 'git-config(1) wrapper for '._config_path($_[0]); }, qw(config-file|system|global|file|f=s), # for conflict detection qw(c=s@ C=s@), pass_through('git config') ], +'inspect' => [ 'ITEMS...', 'inspect lei/store and/or local external', + qw(pretty ascii dir=s), @c_opt ], + 'init' => [ '[DIRNAME]', sub { "initialize storage, default: ".store_path($_[0]); }, @c_opt ], diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm index e3c756e8..daaa6753 100644 --- a/lib/PublicInbox/LeiImport.pm +++ b/lib/PublicInbox/LeiImport.pm @@ -13,7 +13,6 @@ sub input_eml_cb { # used by PublicInbox::LeiInput::input_fh my ($self, $eml, $vmd) = @_; my $xoids = $self->{lei}->{ale}->xoids_for($eml); if (my $all_vmd = $self->{all_vmd}) { - $vmd //= {}; @$vmd{keys %$all_vmd} = values %$all_vmd; } $self->{lei}->{sto}->ipc_do('set_eml', $eml, $vmd, $xoids); @@ -31,11 +30,26 @@ sub input_mbox_cb { # MboxReader callback sub input_maildir_cb { # maildir_each_eml cb my ($f, $kw, $eml, $self) = @_; - input_eml_cb($self, $eml, $self->{-import_kw} ? { kw => $kw } : undef); + my $vmd = $self->{-import_kw} ? { kw => $kw } : undef; + if ($self->{-mail_sync}) { + if ($f =~ m!\A(.+?)/(?:new|cur)/([^/]+)\z!) { # ugh... + $vmd->{sync_info} = [ "maildir:$1", \(my $n = $2) ]; + } else { + warn "E: $f was not from a Maildir?\n"; + } + } + input_eml_cb($self, $eml, $vmd); } -sub input_net_cb { # imap_each, nntp_each cb +sub input_imap_cb { # imap_each my ($url, $uid, $kw, $eml, $self) = @_; + my $vmd = $self->{-import_kw} ? { kw => $kw } : undef; + $vmd->{sync_info} = [ $url, $uid ] if $self->{-mail_sync}; + input_eml_cb($self, $eml, $vmd); +} + +sub input_nntp_cb { # nntp_each + my ($url, $num, $kw, $eml, $self) = @_; input_eml_cb($self, $eml, $self->{-import_kw} ? { kw => $kw } : undef); } @@ -61,6 +75,8 @@ sub lei_import { # the main "lei import" method return $lei->fail(join("\n", @{$vmd_mod->{err}})) if $vmd_mod->{err}; $self->{all_vmd} = $vmd_mod if scalar keys %$vmd_mod; $self->prepare_inputs($lei, \@inputs) or return; + $self->{-mail_sync} = $lei->{opt}->{sync} // 1; + $lei->ale; # initialize for workers to read my $j = $lei->{opt}->{jobs} // scalar(@{$self->{inputs}}) || 1; if (my $net = $lei->{net}) { diff --git a/lib/PublicInbox/LeiInput.pm b/lib/PublicInbox/LeiInput.pm index 0114f5ee..d11d23d4 100644 --- a/lib/PublicInbox/LeiInput.pm +++ b/lib/PublicInbox/LeiInput.pm @@ -83,11 +83,13 @@ sub input_path_url { my $ifmt = lc($lei->{opt}->{'in-format'} // ''); # TODO auto-detect? if ($input =~ m!\Aimaps?://!i) { - $lei->{net}->imap_each($input, $self->can('input_net_cb'), + $lei->{net}->imap_each($input, $self->can('input_imap_cb') // + $self->can('input_net_cb'), $self, @args); return; } elsif ($input =~ m!\A(?:nntps?|s?news)://!i) { - $lei->{net}->nntp_each($input, $self->can('input_net_cb'), + $lei->{net}->nntp_each($input, $self->can('input_nntp_cb') // + $self->can('input_net_cb'), $self, @args); return; } @@ -130,11 +132,13 @@ EOM sub prepare_inputs { # returns undef on error my ($self, $lei, $inputs) = @_; my $in_fmt = $lei->{opt}->{'in-format'}; + my $sync = $lei->{opt}->{sync} ? {} : undef; # using LeiMailSync if ($lei->{opt}->{stdin}) { @$inputs and return $lei->fail("--stdin and @$inputs do not mix"); check_input_format($lei) or return; push @$inputs, '/dev/stdin'; + push @{$sync->{no}}, '/dev/stdin' if $sync; } my $net = $lei->{net}; # NetWriter may be created by l2m my (@f, @d); @@ -145,6 +149,13 @@ sub prepare_inputs { # returns undef on error require PublicInbox::NetReader; $net //= PublicInbox::NetReader->new; $net->add_url($input); + if ($sync) { + if ($input =~ m!\Aimaps?://!) { + push @{$sync->{ok}}, $input; + } else { + push @{$sync->{no}}, $input; + } + } } elsif ($input_path =~ s/\A([a-z0-9]+)://is) { my $ifmt = lc $1; if (($in_fmt // $ifmt) ne $ifmt) { @@ -152,6 +163,13 @@ sub prepare_inputs { # returns undef on error --in-format=$in_fmt and `$ifmt:' conflict } + if ($sync) { + if ($ifmt =~ /\A(?:maildir|mh)\z/i) { + push @{$sync->{ok}}, $input; + } else { + push @{$sync->{no}}, $input; + } + } my $devfd = $lei->path_to_fd($input_path) // return; if ($devfd >= 0 || (-f $input_path || -p _)) { require PublicInbox::MboxLock; @@ -162,6 +180,7 @@ sub prepare_inputs { # returns undef on error require PublicInbox::MdirReader; $ifmt eq 'maildir' or return $lei->fail("$ifmt not supported"); + $input = $lei->abs_path($input) if $sync; } else { return $lei->fail("Unable to handle $input"); } @@ -170,12 +189,18 @@ sub prepare_inputs { # returns undef on error $input is `eml', not --in-format=$in_fmt require PublicInbox::Eml; + push @{$sync->{no}}, $input if $sync; } else { my $devfd = $lei->path_to_fd($input) // return; if ($devfd >= 0 || -f $input || -p _) { - push @f, $input + push @{$sync->{no}}, $input if $sync; + push @f, $input; } elsif (-d $input) { - push @d, $input + if ($sync) { + $input = $lei->abs_path($input); + push @{$sync->{ok}}, $input; + } + push @d, $input; } else { return $lei->fail("Unable to handle $input") } @@ -185,6 +210,14 @@ $input is `eml', not --in-format=$in_fmt if (@d) { # TODO: check for MH vs Maildir, here require PublicInbox::MdirReader; } + if ($sync && $sync->{no}) { + return $lei->fail(<<"") if !$sync->{ok}; +--sync specified but no inputs support it + + # non-fatal if some inputs support support sync + $lei->err("# --sync will only be used for @{$sync->{ok}}"); + $lei->err("# --sync is not supported for: @{$sync->{no}}"); + } if ($net) { if (my $err = $net->errors) { return $lei->fail($err); diff --git a/lib/PublicInbox/LeiInspect.pm b/lib/PublicInbox/LeiInspect.pm new file mode 100644 index 00000000..6cfc8083 --- /dev/null +++ b/lib/PublicInbox/LeiInspect.pm @@ -0,0 +1,96 @@ +# Copyright (C) 2021 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> + +# "lei inspect" general purpose inspector for stuff in SQLite and +# Xapian. Will eventually be useful with plain public-inboxes, +# not just lei/store. This is totally half-baked at the moment +# but useful for testing. +package PublicInbox::LeiInspect; +use strict; +use v5.10.1; +use PublicInbox::Config; + +sub inspect_blob ($$) { + my ($lei, $oidhex) = @_; + my $ent = {}; + if (my $lse = $lei->{lse}) { + my @docids = $lse ? $lse->over->blob_exists($oidhex) : (); + $ent->{'lei/store'} = \@docids if @docids; + my $lms = $lse->lms; + if (my $loc = $lms ? $lms->locations_for($oidhex) : undef) { + $ent->{sync} = $loc; + } + } + $ent; +} + +sub inspect_sync_folder ($$) { + my ($lei, $folder) = @_; + my $ent = {}; + my $lse = $lei->{lse} or return $ent; + my $lms = $lse->lms or return $ent; + my @folders; + if ($folder =~ m!\Aimaps?://!i) { + require PublicInbox::URIimap; + my $uri = PublicInbox::URIimap->new($folder)->canonical; + if (defined($uri->uidvalidity)) { + $folders[0] = $$uri; + } else { + my @maybe = $lms->folders($$uri); + @folders = grep { + my $u = PublicInbox::URIimap->new($_); + $uri->uidvalidity($u->uidvalidity); + $$uri eq $$u; + } @maybe; + } + } elsif ($folder =~ m!\A(maildir|mh):(.+)!i) { + my $type = $1; + $folders[0] = "$type:".$lei->abs_path($2); + } elsif (-d $folder) { + $folders[0] = 'maildir:'.$lei->abs_path($folder); + } else { + $lei->fail("$folder not understood"); + } + $lei->qerr("# no folders match $folder (non-fatal)") if !@folders; + for my $f (@folders) { + $ent->{$f} = $lms->location_stats($f); # may be undef + } + $ent +} + +sub inspect1 ($$$) { + my ($lei, $item, $more) = @_; + my $ent; + if ($item =~ /\Ablob:(.+)/) { + $ent = inspect_blob($lei, $1); + } elsif ($item =~ m!\Aimaps?://!i || + $item =~ m!\A(?:maildir|mh):!i || -d $item) { + $ent = inspect_sync_folder($lei, $item); + } else { # TODO: more things + return $lei->fail("$item not understood"); + } + $lei->out($lei->{json}->encode($ent)); + $lei->out(',') if $more; + 1; +} + +sub lei_inspect { + my ($lei, @argv) = @_; + $lei->{1}->autoflush(0); + my $multi = scalar(@argv) > 1; + $lei->out('[') if $multi; + $lei->{json} = ref(PublicInbox::Config::json())->new->utf8->canonical; + $lei->{lse} = ($lei->{opt}->{external} // 1) ? do { + my $sto = $lei->_lei_store; + $sto ? $sto->search : undef; + } : undef; + if ($lei->{opt}->{pretty} || -t $lei->{1}) { + $lei->{json}->pretty(1)->indent(2); + } + while (defined(my $x = shift @argv)) { + inspect1($lei, $x, scalar(@argv)) or return; + } + $lei->out(']') if $multi; +} + +1; diff --git a/lib/PublicInbox/LeiSearch.pm b/lib/PublicInbox/LeiSearch.pm index ff615d89..cd28a700 100644 --- a/lib/PublicInbox/LeiSearch.pm +++ b/lib/PublicInbox/LeiSearch.pm @@ -137,4 +137,11 @@ sub qparse_new { $qp } +sub lms { + my ($self) = @_; + require PublicInbox::LeiMailSync; + my $f = "$self->{topdir}/mail_sync.sqlite3"; + -f $f ? PublicInbox::LeiMailSync->new($f) : undef; +} + 1; diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm index f8371abf..1cf7ffc1 100644 --- a/lib/PublicInbox/LeiStore.pm +++ b/lib/PublicInbox/LeiStore.pm @@ -190,13 +190,28 @@ sub remove_eml_vmd { \@docids; } +sub set_sync_info ($$$) { + my ($self, $oidhex, $sync_info) = @_; + ($self->{lms} //= do { + require PublicInbox::LeiMailSync; + my $f = "$self->{priv_eidx}->{topdir}/mail_sync.sqlite3"; + my $lms = PublicInbox::LeiMailSync->new($f); + $lms->lms_begin; + $lms; + })->set_src($oidhex, @$sync_info); +} + sub add_eml { my ($self, $eml, $vmd, $xoids) = @_; my $im = $self->importer; # may create new epoch my ($eidx, $tl) = eidx_init($self); # updates/writes alternates file my $oidx = $eidx->{oidx}; # PublicInbox::Import::add checks this my $smsg = bless { -oidx => $oidx }, 'PublicInbox::Smsg'; - $im->add($eml, undef, $smsg) or return; # duplicate returns undef + my $im_mark = $im->add($eml, undef, $smsg); + if ($vmd && $vmd->{sync_info}) { + set_sync_info($self, $smsg->{blob}, $vmd->{sync_info}); + } + $im_mark or return; # duplicate blob returns undef local $self->{current_info} = $smsg->{blob}; my $vivify_xvmd = delete($smsg->{-vivify_xvmd}) // []; # exact matches @@ -379,6 +394,9 @@ sub done { warn $err; } } + if (my $lms = delete $self->{lms}) { + $lms->lms_commit; + } $self->{priv_eidx}->done; # V2Writable::done xchg_stderr($self); die $err if $err; diff --git a/t/lei-import-imap.t b/t/lei-import-imap.t index 490ea9be..4a3bd6d8 100644 --- a/t/lei-import-imap.t +++ b/t/lei-import-imap.t @@ -12,10 +12,28 @@ my $td = start_script($cmd, $env, { 3 => $sock }) or BAIL_OUT("-imapd: $?"); my $host_port = tcp_host_port($sock); undef $sock; test_lei({ tmpdir => $tmpdir }, sub { + my $url = "imap://$host_port/t.v2.0"; + lei_ok(qw(q z:1..)); my $out = json_utf8->decode($lei_out); is_deeply($out, [ undef ], 'nothing imported, yet'); - lei_ok('import', "imap://$host_port/t.v2.0"); + + lei_ok('inspect', $url); + is_deeply(json_utf8->decode($lei_out), {}, 'no inspect stats, yet'); + + lei_ok('import', $url); + + lei_ok('inspect', $url); + my $inspect = json_utf8->decode($lei_out); + my @k = keys %$inspect; + is(scalar(@k), 1, 'one URL resolved'); + like($k[0], qr!\A\Q$url\E;UIDVALIDITY=\d+\z!, 'inspect URL matches'); + my $stats = $inspect->{$k[0]}; + is_deeply([ sort keys %$stats ], + [ qw(uid.count uid.max uid.min) ], 'keys match'); + ok($stats->{'uid.min'} < $stats->{'uid.max'}, 'min < max'); + ok($stats->{'uid.count'} > 0, 'count > 0'); + lei_ok(qw(q z:1..)); $out = json_utf8->decode($lei_out); ok(scalar(@$out) > 1, 'got imported messages'); @@ -23,9 +41,14 @@ test_lei({ tmpdir => $tmpdir }, sub { my %r; for (@$out) { $r{ref($_)}++ } is_deeply(\%r, { 'HASH' => scalar(@$out) }, 'all hashes'); - lei_ok([qw(tag +kw:seen), "imap://$host_port/t.v2.0"], undef, undef); + lei_ok([qw(tag +kw:seen), $url], undef, undef); my $f = "$ENV{HOME}/.local/share/lei/store/net_last.sqlite3"; ok(-s $f, 'net tracked for redundant imports'); + lei_ok('inspect', "blob:$out->[5]->{blob}"); + my $x = json_utf8->decode($lei_out); + is(ref($x->{'lei/store'}), 'ARRAY', 'lei/store in inspect'); + is(ref($x->{sync}), 'HASH', 'sync in inspect'); + is(ref($x->{sync}->{$k[0]}), 'ARRAY', 'UID arrays in inspect'); }); done_testing; diff --git a/t/lei-import-maildir.t b/t/lei-import-maildir.t index 6706b014..3e3d9188 100644 --- a/t/lei-import-maildir.t +++ b/t/lei-import-maildir.t @@ -12,6 +12,21 @@ test_lei(sub { BAIL_OUT "symlink $md $!"; lei_ok(qw(import), $md, \'import Maildir'); my $imp_err = $lei_err; + + my %i; + lei_ok('inspect', $md); $i{no_type} = $lei_out; + lei_ok('inspect', "maildir:$md"), $i{with_type} = $lei_out; + lei_ok(['inspect', $md], undef, { -C => $ENV{HOME}, %$lei_opt }); + $i{rel_no_type} = $lei_out; + lei_ok(['inspect', "maildir:$md"], undef, + { -C => $ENV{HOME}, %$lei_opt }); + $i{rel_with_type} = $lei_out; + my %v = map { $_ => 1 } values %i; + is(scalar(keys %v), 1, 'inspect handles relative and absolute paths'); + my $inspect = json_utf8->decode([ keys %v ]->[0]); + is_deeply($inspect, {"maildir:$md" => { 'name.count' => 1 }}, + 'inspect maildir: path had expected output'); + lei_ok(qw(q s:boolean)); my $res = json_utf8->decode($lei_out); like($res->[0]->{'s'}, qr/use boolean/, 'got expected result') @@ -19,6 +34,12 @@ test_lei(sub { is_deeply($res->[0]->{kw}, ['seen'], 'keyword set'); is($res->[1], undef, 'only got one result'); + lei_ok('inspect', "blob:$res->[0]->{blob}"); + $inspect = json_utf8->decode($lei_out); + is(ref(delete $inspect->{"lei/store"}), 'ARRAY', 'lei/store IDs'); + is_deeply($inspect, { sync => { "maildir:$md" => [ 'x:2,S' ] } }, + 'maildir sync info as expected'); + lei_ok(qw(import), $md, \'import Maildir again'); $imp_err = $lei_err; lei_ok(qw(q -d none s:boolean), \'lei q w/o dedupe');