unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 6/7] lei_mail_sync: for bidirectional keyword sync
Date: Sat, 24 Apr 2021 09:28:45 +0000	[thread overview]
Message-ID: <20210424092846.726-7-e@80x24.org> (raw)
In-Reply-To: <20210424092846.726-1-e@80x24.org>

We'll be using the new class to efficiently propagate keyword
changes from lei/store back to Maildir or IMAP folders.
---
 MANIFEST                       |   2 +
 lib/PublicInbox/LeiMailSync.pm | 211 +++++++++++++++++++++++++++++++++
 t/lei_mail_sync.t              |  68 +++++++++++
 3 files changed, 281 insertions(+)
 create mode 100644 lib/PublicInbox/LeiMailSync.pm
 create mode 100644 t/lei_mail_sync.t

diff --git a/MANIFEST b/MANIFEST
index e0f9c35b..abaf54b0 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -201,6 +201,7 @@ lib/PublicInbox/LeiInit.pm
 lib/PublicInbox/LeiInput.pm
 lib/PublicInbox/LeiLsLabel.pm
 lib/PublicInbox/LeiLsSearch.pm
+lib/PublicInbox/LeiMailSync.pm
 lib/PublicInbox/LeiMirror.pm
 lib/PublicInbox/LeiOverview.pm
 lib/PublicInbox/LeiP2q.pm
@@ -407,6 +408,7 @@ t/lei-tag.t
 t/lei.t
 t/lei_dedupe.t
 t/lei_external.t
+t/lei_mail_sync.t
 t/lei_overview.t
 t/lei_saved_search.t
 t/lei_store.t
diff --git a/lib/PublicInbox/LeiMailSync.pm b/lib/PublicInbox/LeiMailSync.pm
new file mode 100644
index 00000000..52f26d69
--- /dev/null
+++ b/lib/PublicInbox/LeiMailSync.pm
@@ -0,0 +1,211 @@
+# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# for maintaining synchronization between lei/store <=> Maildir|MH|IMAP|JMAP
+package PublicInbox::LeiMailSync;
+use strict;
+use v5.10.1;
+use DBI;
+
+sub dbh_new {
+	my ($self, $rw) = @_;
+	my $f = $self->{filename};
+	my $creat;
+	if (!-f $f && $rw) {
+		require PublicInbox::Spawn;
+		open my $fh, '+>>', $f or die "failed to open $f: $!";
+		PublicInbox::Spawn::nodatacow_fd(fileno($fh));
+		$creat = 1;
+	}
+	my $dbh = DBI->connect("dbi:SQLite:dbname=$f",'','', {
+		AutoCommit => 1,
+		RaiseError => 1,
+		PrintError => 0,
+		ReadOnly => !$rw,
+		sqlite_use_immediate_transaction => 1,
+	});
+	# no sqlite_unicode, here, all strings are binary
+	create_tables($dbh) if $rw;
+	$dbh->do('PRAGMA journal_mode = WAL') if $creat;
+	$dbh->do('PRAGMA case_sensitive_like = ON');
+	$dbh;
+}
+
+sub new {
+	my ($cls, $f) = @_;
+	bless { filename => $f, fmap => {} }, $cls;
+}
+
+sub lms_commit { delete($_[0]->{dbh})->commit }
+
+sub lms_begin { ($_[0]->{dbh} //= dbh_new($_[0], 1))->begin_work };
+
+sub create_tables {
+	my ($dbh) = @_;
+
+	$dbh->do(<<'');
+CREATE TABLE IF NOT EXISTS folders (
+	fid INTEGER PRIMARY KEY,
+	loc VARBINARY NOT NULL, /* URL;UIDVALIDITY=$N or $TYPE:/pathname */
+	UNIQUE (loc)
+)
+
+	$dbh->do(<<'');
+CREATE TABLE IF NOT EXISTS blob2num (
+	oidbin VARBINARY NOT NULL,
+	fid INTEGER NOT NULL, /* folder ID */
+	uid INTEGER NOT NULL, /* NNTP article number, IMAP UID, MH number */
+	UNIQUE (oidbin, fid, uid)
+)
+
+	$dbh->do(<<'');
+CREATE TABLE IF NOT EXISTS blob2name (
+	oidbin VARBINARY NOT NULL,
+	fid INTEGER NOT NULL, /* folder ID */
+	name VARBINARY NOT NULL, /* Maildir basename, JMAP blobId */
+	UNIQUE (oidbin, fid, name)
+)
+
+}
+
+sub _fid_for {
+	my ($self, $folder, $rw) = @_;
+	my $dbh = $self->{dbh};
+	my ($row) = $dbh->selectrow_array(<<'', undef, $folder);
+SELECT fid FROM folders WHERE loc = ? LIMIT 1
+
+	return $row if defined $row;
+	return unless $rw;
+
+	($row) = $dbh->selectrow_array('SELECT MAX(fid) FROM folders');
+
+	my $fid = ($row // 0) + 1;
+	# in case we're reusing, clobber existing stale refs:
+	$dbh->do('DELETE FROM blob2name WHERE fid = ?', undef, $fid);
+	$dbh->do('DELETE FROM blob2num WHERE fid = ?', undef, $fid);
+
+	my $sth = $dbh->prepare('INSERT INTO folders (fid, loc) VALUES (?, ?)');
+	$sth->execute($fid, $folder);
+
+	$fid;
+}
+
+sub set_src {
+	my ($self, $oidhex, $folder, $id) = @_;
+	my $fid = $self->{fmap}->{$folder} //= _fid_for($self, $folder, 1);
+	my $sth;
+	if (ref($id)) { # scalar name
+		$id = $$id;
+		$sth = $self->{dbh}->prepare_cached(<<'');
+INSERT OR IGNORE INTO blob2name (oidbin, fid, name) VALUES (?, ?, ?)
+
+	} else { # numeric ID (IMAP UID, MH number)
+		$sth = $self->{dbh}->prepare_cached(<<'');
+INSERT OR IGNORE INTO blob2num (oidbin, fid, uid) VALUES (?, ?, ?)
+
+	}
+	$sth->execute(pack('H*', $oidhex), $fid, $id);
+}
+
+sub clear_src {
+	my ($self, $folder, $id) = @_;
+	my $fid = $self->{fmap}->{$folder} //= _fid_for($self, $folder, 1);
+	my $sth;
+	if (ref($id)) { # scalar name
+		$id = $$id;
+		$sth = $self->{dbh}->prepare_cached(<<'');
+DELETE FROM blob2name WHERE fid = ? AND name = ?
+
+	} else {
+		$sth = $self->{dbh}->prepare_cached(<<'');
+DELETE FROM blob2num WHERE fid = ? AND uid = ?
+
+	}
+	$sth->execute($fid, $id);
+}
+
+# read-only, iterates every oidbin + UID or name for a given folder
+sub each_src {
+	my ($self, $folder, $cb, @args) = @_;
+	my $dbh = $self->{dbh} //= dbh_new($self);
+	my ($fid, $sth);
+	$fid = $self->{fmap}->{$folder} //= _fid_for($self, $folder) // return;
+	$sth = $dbh->prepare('SELECT oidbin,uid FROM blob2num WHERE fid = ?');
+	$sth->execute($fid);
+	while (my ($oidbin, $id) = $sth->fetchrow_array) {
+		$cb->($oidbin, $id, @args);
+	}
+	$sth = $dbh->prepare('SELECT oidbin,name FROM blob2name WHERE fid = ?');
+	$sth->execute($fid);
+	while (my ($oidbin, $id) = $sth->fetchrow_array) {
+		$cb->($oidbin, \$id, @args);
+	}
+}
+
+sub location_stats {
+	my ($self, $folder, $cb, @args) = @_;
+	my $dbh = $self->{dbh} //= dbh_new($self);
+	my $fid;
+	my $ret = {};
+	$fid = $self->{fmap}->{$folder} //= _fid_for($self, $folder) // return;
+	my ($row) = $dbh->selectrow_array(<<"", undef, $fid);
+SELECT COUNT(name) FROM blob2name WHERE fid = ?
+
+	$ret->{'name.count'} = $row if $row;
+	for my $op (qw(count min max)) {
+		($row) = $dbh->selectrow_array(<<"", undef, $fid);
+SELECT $op(uid) FROM blob2num WHERE fid = ?
+
+		$row or last;
+		$ret->{"uid.$op"} = $row;
+	}
+	$ret;
+}
+
+# returns a { location => [ list-of-ids-or-names ] } mapping
+sub locations_for {
+	my ($self, $oidhex) = @_;
+	my ($fid, $sth, $id, %fid2id);
+	my $dbh = $self->{dbh} //= dbh_new($self);
+	$sth = $dbh->prepare('SELECT fid,uid FROM blob2num WHERE oidbin = ?');
+	$sth->execute(pack('H*', $oidhex));
+	while (my ($fid, $uid) = $sth->fetchrow_array) {
+		push @{$fid2id{$fid}}, $uid;
+	}
+	$sth = $dbh->prepare('SELECT fid,name FROM blob2name WHERE oidbin = ?');
+	$sth->execute(pack('H*', $oidhex));
+	while (my ($fid, $name) = $sth->fetchrow_array) {
+		push @{$fid2id{$fid}}, $name;
+	}
+	$sth = $dbh->prepare('SELECT loc FROM folders WHERE fid = ? LIMIT 1');
+	my $ret = {};
+	while (my ($fid, $ids) = each %fid2id) {
+		$sth->execute($fid);
+		my ($loc) = $sth->fetchrow_array;
+		unless (defined $loc) {
+			warn "E: fid=$fid for $oidhex unknown:\n", map {
+					'E: '.(ref() ? $$_ : "#$_")."\n";
+				} @$ids;
+			next;
+		}
+		$ret->{$loc} = $ids;
+	}
+	scalar(keys %$ret) ? $ret : undef;
+}
+
+# returns a list of folders used for completion
+sub folders {
+	my ($self, $pfx) = @_;
+	my $dbh = $self->{dbh} //= dbh_new($self);
+	my $sql = 'SELECT loc FROM folders';
+	my @pfx;
+	if (defined $pfx) {
+		$sql .= ' WHERE loc LIKE ? ESCAPE ?';
+		@pfx = ($pfx, '\\');
+		$pfx[0] =~ s/([%_\\])/\\$1/g; # glob chars
+		$pfx[0] .= '%';
+	}
+	map { $_->[0] } @{$dbh->selectall_arrayref($sql, undef, @pfx)};
+}
+
+1;
diff --git a/t/lei_mail_sync.t b/t/lei_mail_sync.t
new file mode 100644
index 00000000..864d6e48
--- /dev/null
+++ b/t/lei_mail_sync.t
@@ -0,0 +1,68 @@
+#!perl -w
+# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use v5.10.1;
+use PublicInbox::TestCommon;
+require_mods(qw(DBD::SQLite));
+require_ok 'PublicInbox::LeiMailSync';
+my ($dir, $for_destroy) = tmpdir();
+my $lms = PublicInbox::LeiMailSync->new("$dir/t.sqlite3");
+
+$lms->lms_begin;
+$lms->lms_commit;
+my $ro = PublicInbox::LeiMailSync->new("$dir/t.sqlite3");
+is_deeply([$ro->folders], [], 'no folders, yet');
+
+my $imap = 'imaps://bob@[::1]/INBOX;UIDVALIDITY=9';
+$lms->lms_begin;
+is($lms->set_src('deadbeef', $imap, 1), 1, 'set IMAP once');
+ok($lms->set_src('deadbeef', $imap, 1) == 0, 'set IMAP idempotently');
+$lms->lms_commit;
+is_deeply([$ro->folders], [$imap], 'IMAP folder added');
+is_deeply([$ro->folders($imap)], [$imap], 'IMAP folder with full GLOB');
+is_deeply([$ro->folders('imaps://bob@[::1]/INBOX')], [$imap],
+		'IMAP folder with partial GLOB');
+
+is_deeply($ro->locations_for('deadbeef'),
+	{ $imap => [ 1 ] }, 'locations_for w/ imap');
+
+my $maildir = 'maildir:/home/user/md';
+my $fname = 'foo:2,S';
+$lms->lms_begin;
+ok($lms->set_src('deadbeef', $maildir, \$fname), 'set Maildir once');
+ok($lms->set_src('deadbeef', $maildir, \$fname) == 0, 'set Maildir again');
+$lms->lms_commit;
+is_deeply($ro->locations_for('deadbeef'),
+	{ $imap => [ 1 ], $maildir => [ $fname ] },
+	'locations_for w/ maildir + imap');
+
+is_deeply([sort($ro->folders)], [$imap, $maildir], 'both folders shown');
+my @res;
+$ro->each_src($maildir, sub {
+	my ($oidbin, $id) = @_;
+	push @res, [ unpack('H*', $oidbin), $id ];
+});
+is_deeply(\@res, [ ['deadbeef', \$fname] ], 'each_src works on Maildir');
+
+@res = ();
+$ro->each_src($imap, sub {
+	my ($oidbin, $id) = @_;
+	push @res, [ unpack('H*', $oidbin), $id ];
+});
+is_deeply(\@res, [ ['deadbeef', 1] ], 'each_src works on IMAP');
+
+is_deeply($ro->location_stats($maildir), { 'name.count' => 1 },
+	'Maildir location stats');
+is_deeply($ro->location_stats($imap),
+	{ 'uid.count' => 1, 'uid.max' => 1, 'uid.min' => 1 },
+	'IMAP location stats');
+$lms->lms_begin;
+is($lms->clear_src($imap, 1), 1, 'clear_src on IMAP');
+is($lms->clear_src($maildir, \$fname), 1, 'clear_src on Maildir');
+ok($lms->clear_src($imap, 1) == 0, 'clear_src again on IMAP');
+ok($lms->clear_src($maildir, \$fname) == 0, 'clear_src again on Maildir');
+$lms->lms_commit;
+is_deeply($ro->location_stats($maildir), {}, 'nothing left');
+
+done_testing;

  parent reply	other threads:[~2021-04-24  9:28 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-24  9:28 [PATCH 0/7] lei sync preparations, "lei inspect" Eric Wong
2021-04-24  9:28 ` [PATCH 1/7] lei_input: drop outdated comment w.r.t. compression Eric Wong
2021-04-24  9:28 ` [PATCH 2/7] t/lei_to_mail: split "lei import" test $HOME directory Eric Wong
2021-04-24  9:28 ` [PATCH 3/7] URIimap: support ->uidvalidity and ->iuid Eric Wong
2021-04-24  9:28 ` [PATCH 4/7] net_reader: imap_each: add UIDVALIDITY to URL arg Eric Wong
2021-04-24  9:28 ` [PATCH 5/7] doc: lei_design_notes: add a bit on WAL usage Eric Wong
2021-04-24  9:28 ` Eric Wong [this message]
2021-04-24  9:28 ` [PATCH 7/7] lei import: keep sync info for Maildir and IMAP folders Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210424092846.726-7-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).