From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 3BBCF1FA13 for ; Sat, 24 Apr 2021 09:28:47 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 6/7] lei_mail_sync: for bidirectional keyword sync Date: Sat, 24 Apr 2021 09:28:45 +0000 Message-Id: <20210424092846.726-7-e@80x24.org> In-Reply-To: <20210424092846.726-1-e@80x24.org> References: <20210424092846.726-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: We'll be using the new class to efficiently propagate keyword changes from lei/store back to Maildir or IMAP folders. --- MANIFEST | 2 + lib/PublicInbox/LeiMailSync.pm | 211 +++++++++++++++++++++++++++++++++ t/lei_mail_sync.t | 68 +++++++++++ 3 files changed, 281 insertions(+) create mode 100644 lib/PublicInbox/LeiMailSync.pm create mode 100644 t/lei_mail_sync.t diff --git a/MANIFEST b/MANIFEST index e0f9c35b..abaf54b0 100644 --- a/MANIFEST +++ b/MANIFEST @@ -201,6 +201,7 @@ lib/PublicInbox/LeiInit.pm lib/PublicInbox/LeiInput.pm lib/PublicInbox/LeiLsLabel.pm lib/PublicInbox/LeiLsSearch.pm +lib/PublicInbox/LeiMailSync.pm lib/PublicInbox/LeiMirror.pm lib/PublicInbox/LeiOverview.pm lib/PublicInbox/LeiP2q.pm @@ -407,6 +408,7 @@ t/lei-tag.t t/lei.t t/lei_dedupe.t t/lei_external.t +t/lei_mail_sync.t t/lei_overview.t t/lei_saved_search.t t/lei_store.t diff --git a/lib/PublicInbox/LeiMailSync.pm b/lib/PublicInbox/LeiMailSync.pm new file mode 100644 index 00000000..52f26d69 --- /dev/null +++ b/lib/PublicInbox/LeiMailSync.pm @@ -0,0 +1,211 @@ +# Copyright (C) 2021 all contributors +# License: AGPL-3.0+ + +# for maintaining synchronization between lei/store <=> Maildir|MH|IMAP|JMAP +package PublicInbox::LeiMailSync; +use strict; +use v5.10.1; +use DBI; + +sub dbh_new { + my ($self, $rw) = @_; + my $f = $self->{filename}; + my $creat; + if (!-f $f && $rw) { + require PublicInbox::Spawn; + open my $fh, '+>>', $f or die "failed to open $f: $!"; + PublicInbox::Spawn::nodatacow_fd(fileno($fh)); + $creat = 1; + } + my $dbh = DBI->connect("dbi:SQLite:dbname=$f",'','', { + AutoCommit => 1, + RaiseError => 1, + PrintError => 0, + ReadOnly => !$rw, + sqlite_use_immediate_transaction => 1, + }); + # no sqlite_unicode, here, all strings are binary + create_tables($dbh) if $rw; + $dbh->do('PRAGMA journal_mode = WAL') if $creat; + $dbh->do('PRAGMA case_sensitive_like = ON'); + $dbh; +} + +sub new { + my ($cls, $f) = @_; + bless { filename => $f, fmap => {} }, $cls; +} + +sub lms_commit { delete($_[0]->{dbh})->commit } + +sub lms_begin { ($_[0]->{dbh} //= dbh_new($_[0], 1))->begin_work }; + +sub create_tables { + my ($dbh) = @_; + + $dbh->do(<<''); +CREATE TABLE IF NOT EXISTS folders ( + fid INTEGER PRIMARY KEY, + loc VARBINARY NOT NULL, /* URL;UIDVALIDITY=$N or $TYPE:/pathname */ + UNIQUE (loc) +) + + $dbh->do(<<''); +CREATE TABLE IF NOT EXISTS blob2num ( + oidbin VARBINARY NOT NULL, + fid INTEGER NOT NULL, /* folder ID */ + uid INTEGER NOT NULL, /* NNTP article number, IMAP UID, MH number */ + UNIQUE (oidbin, fid, uid) +) + + $dbh->do(<<''); +CREATE TABLE IF NOT EXISTS blob2name ( + oidbin VARBINARY NOT NULL, + fid INTEGER NOT NULL, /* folder ID */ + name VARBINARY NOT NULL, /* Maildir basename, JMAP blobId */ + UNIQUE (oidbin, fid, name) +) + +} + +sub _fid_for { + my ($self, $folder, $rw) = @_; + my $dbh = $self->{dbh}; + my ($row) = $dbh->selectrow_array(<<'', undef, $folder); +SELECT fid FROM folders WHERE loc = ? LIMIT 1 + + return $row if defined $row; + return unless $rw; + + ($row) = $dbh->selectrow_array('SELECT MAX(fid) FROM folders'); + + my $fid = ($row // 0) + 1; + # in case we're reusing, clobber existing stale refs: + $dbh->do('DELETE FROM blob2name WHERE fid = ?', undef, $fid); + $dbh->do('DELETE FROM blob2num WHERE fid = ?', undef, $fid); + + my $sth = $dbh->prepare('INSERT INTO folders (fid, loc) VALUES (?, ?)'); + $sth->execute($fid, $folder); + + $fid; +} + +sub set_src { + my ($self, $oidhex, $folder, $id) = @_; + my $fid = $self->{fmap}->{$folder} //= _fid_for($self, $folder, 1); + my $sth; + if (ref($id)) { # scalar name + $id = $$id; + $sth = $self->{dbh}->prepare_cached(<<''); +INSERT OR IGNORE INTO blob2name (oidbin, fid, name) VALUES (?, ?, ?) + + } else { # numeric ID (IMAP UID, MH number) + $sth = $self->{dbh}->prepare_cached(<<''); +INSERT OR IGNORE INTO blob2num (oidbin, fid, uid) VALUES (?, ?, ?) + + } + $sth->execute(pack('H*', $oidhex), $fid, $id); +} + +sub clear_src { + my ($self, $folder, $id) = @_; + my $fid = $self->{fmap}->{$folder} //= _fid_for($self, $folder, 1); + my $sth; + if (ref($id)) { # scalar name + $id = $$id; + $sth = $self->{dbh}->prepare_cached(<<''); +DELETE FROM blob2name WHERE fid = ? AND name = ? + + } else { + $sth = $self->{dbh}->prepare_cached(<<''); +DELETE FROM blob2num WHERE fid = ? AND uid = ? + + } + $sth->execute($fid, $id); +} + +# read-only, iterates every oidbin + UID or name for a given folder +sub each_src { + my ($self, $folder, $cb, @args) = @_; + my $dbh = $self->{dbh} //= dbh_new($self); + my ($fid, $sth); + $fid = $self->{fmap}->{$folder} //= _fid_for($self, $folder) // return; + $sth = $dbh->prepare('SELECT oidbin,uid FROM blob2num WHERE fid = ?'); + $sth->execute($fid); + while (my ($oidbin, $id) = $sth->fetchrow_array) { + $cb->($oidbin, $id, @args); + } + $sth = $dbh->prepare('SELECT oidbin,name FROM blob2name WHERE fid = ?'); + $sth->execute($fid); + while (my ($oidbin, $id) = $sth->fetchrow_array) { + $cb->($oidbin, \$id, @args); + } +} + +sub location_stats { + my ($self, $folder, $cb, @args) = @_; + my $dbh = $self->{dbh} //= dbh_new($self); + my $fid; + my $ret = {}; + $fid = $self->{fmap}->{$folder} //= _fid_for($self, $folder) // return; + my ($row) = $dbh->selectrow_array(<<"", undef, $fid); +SELECT COUNT(name) FROM blob2name WHERE fid = ? + + $ret->{'name.count'} = $row if $row; + for my $op (qw(count min max)) { + ($row) = $dbh->selectrow_array(<<"", undef, $fid); +SELECT $op(uid) FROM blob2num WHERE fid = ? + + $row or last; + $ret->{"uid.$op"} = $row; + } + $ret; +} + +# returns a { location => [ list-of-ids-or-names ] } mapping +sub locations_for { + my ($self, $oidhex) = @_; + my ($fid, $sth, $id, %fid2id); + my $dbh = $self->{dbh} //= dbh_new($self); + $sth = $dbh->prepare('SELECT fid,uid FROM blob2num WHERE oidbin = ?'); + $sth->execute(pack('H*', $oidhex)); + while (my ($fid, $uid) = $sth->fetchrow_array) { + push @{$fid2id{$fid}}, $uid; + } + $sth = $dbh->prepare('SELECT fid,name FROM blob2name WHERE oidbin = ?'); + $sth->execute(pack('H*', $oidhex)); + while (my ($fid, $name) = $sth->fetchrow_array) { + push @{$fid2id{$fid}}, $name; + } + $sth = $dbh->prepare('SELECT loc FROM folders WHERE fid = ? LIMIT 1'); + my $ret = {}; + while (my ($fid, $ids) = each %fid2id) { + $sth->execute($fid); + my ($loc) = $sth->fetchrow_array; + unless (defined $loc) { + warn "E: fid=$fid for $oidhex unknown:\n", map { + 'E: '.(ref() ? $$_ : "#$_")."\n"; + } @$ids; + next; + } + $ret->{$loc} = $ids; + } + scalar(keys %$ret) ? $ret : undef; +} + +# returns a list of folders used for completion +sub folders { + my ($self, $pfx) = @_; + my $dbh = $self->{dbh} //= dbh_new($self); + my $sql = 'SELECT loc FROM folders'; + my @pfx; + if (defined $pfx) { + $sql .= ' WHERE loc LIKE ? ESCAPE ?'; + @pfx = ($pfx, '\\'); + $pfx[0] =~ s/([%_\\])/\\$1/g; # glob chars + $pfx[0] .= '%'; + } + map { $_->[0] } @{$dbh->selectall_arrayref($sql, undef, @pfx)}; +} + +1; diff --git a/t/lei_mail_sync.t b/t/lei_mail_sync.t new file mode 100644 index 00000000..864d6e48 --- /dev/null +++ b/t/lei_mail_sync.t @@ -0,0 +1,68 @@ +#!perl -w +# Copyright (C) 2021 all contributors +# License: AGPL-3.0+ +use strict; +use v5.10.1; +use PublicInbox::TestCommon; +require_mods(qw(DBD::SQLite)); +require_ok 'PublicInbox::LeiMailSync'; +my ($dir, $for_destroy) = tmpdir(); +my $lms = PublicInbox::LeiMailSync->new("$dir/t.sqlite3"); + +$lms->lms_begin; +$lms->lms_commit; +my $ro = PublicInbox::LeiMailSync->new("$dir/t.sqlite3"); +is_deeply([$ro->folders], [], 'no folders, yet'); + +my $imap = 'imaps://bob@[::1]/INBOX;UIDVALIDITY=9'; +$lms->lms_begin; +is($lms->set_src('deadbeef', $imap, 1), 1, 'set IMAP once'); +ok($lms->set_src('deadbeef', $imap, 1) == 0, 'set IMAP idempotently'); +$lms->lms_commit; +is_deeply([$ro->folders], [$imap], 'IMAP folder added'); +is_deeply([$ro->folders($imap)], [$imap], 'IMAP folder with full GLOB'); +is_deeply([$ro->folders('imaps://bob@[::1]/INBOX')], [$imap], + 'IMAP folder with partial GLOB'); + +is_deeply($ro->locations_for('deadbeef'), + { $imap => [ 1 ] }, 'locations_for w/ imap'); + +my $maildir = 'maildir:/home/user/md'; +my $fname = 'foo:2,S'; +$lms->lms_begin; +ok($lms->set_src('deadbeef', $maildir, \$fname), 'set Maildir once'); +ok($lms->set_src('deadbeef', $maildir, \$fname) == 0, 'set Maildir again'); +$lms->lms_commit; +is_deeply($ro->locations_for('deadbeef'), + { $imap => [ 1 ], $maildir => [ $fname ] }, + 'locations_for w/ maildir + imap'); + +is_deeply([sort($ro->folders)], [$imap, $maildir], 'both folders shown'); +my @res; +$ro->each_src($maildir, sub { + my ($oidbin, $id) = @_; + push @res, [ unpack('H*', $oidbin), $id ]; +}); +is_deeply(\@res, [ ['deadbeef', \$fname] ], 'each_src works on Maildir'); + +@res = (); +$ro->each_src($imap, sub { + my ($oidbin, $id) = @_; + push @res, [ unpack('H*', $oidbin), $id ]; +}); +is_deeply(\@res, [ ['deadbeef', 1] ], 'each_src works on IMAP'); + +is_deeply($ro->location_stats($maildir), { 'name.count' => 1 }, + 'Maildir location stats'); +is_deeply($ro->location_stats($imap), + { 'uid.count' => 1, 'uid.max' => 1, 'uid.min' => 1 }, + 'IMAP location stats'); +$lms->lms_begin; +is($lms->clear_src($imap, 1), 1, 'clear_src on IMAP'); +is($lms->clear_src($maildir, \$fname), 1, 'clear_src on Maildir'); +ok($lms->clear_src($imap, 1) == 0, 'clear_src again on IMAP'); +ok($lms->clear_src($maildir, \$fname) == 0, 'clear_src again on Maildir'); +$lms->lms_commit; +is_deeply($ro->location_stats($maildir), {}, 'nothing left'); + +done_testing;