unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
* [PATCH 0/2] lei_mail_sync ambiguity fixes
@ 2022-04-02  1:13 Eric Wong
  2022-04-02  1:13 ` [PATCH 1/2] lei_mail_sync: ensure URLs and folder names are stored as binary Eric Wong
  2022-04-02  1:13 ` [PATCH 2/2] lei_mail_sync: store OIDs and Maildir filenames as blobs Eric Wong
  0 siblings, 2 replies; 4+ messages in thread
From: Eric Wong @ 2022-04-02  1:13 UTC (permalink / raw)
  To: meta

Ugh, these took a while :x  But I think it's important for interop, etc.

Eric Wong (2):
  lei_mail_sync: ensure URLs and folder names are stored as binary
  lei_mail_sync: store OIDs and Maildir filenames as blobs

 lib/PublicInbox/LeiMailSync.pm | 170 +++++++++++++++++++++++++--------
 t/lei_mail_sync.t              |   5 +-
 2 files changed, 136 insertions(+), 39 deletions(-)

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 1/2] lei_mail_sync: ensure URLs and folder names are stored as binary
  2022-04-02  1:13 [PATCH 0/2] lei_mail_sync ambiguity fixes Eric Wong
@ 2022-04-02  1:13 ` Eric Wong
  2022-04-02 23:45   ` Eric Wong
  2022-04-02  1:13 ` [PATCH 2/2] lei_mail_sync: store OIDs and Maildir filenames as blobs Eric Wong
  1 sibling, 1 reply; 4+ messages in thread
From: Eric Wong @ 2022-04-02  1:13 UTC (permalink / raw)
  To: meta

Apparently leaving {sqlite_unicode} unset isn't enough, and
there's subtle differences where BLOBs are stored differently
than TEXT when dealing with binary data.  We also want to avoid
odd cases where SQLite will attempt to treat a number-like value
as an integer.

This should avoid problems in case non-UTF-8 URLs and pathnames are
used.  They'll automatically be upgraded if not, but downgrades
to older lei would cause duplicates to appear.
---
 lib/PublicInbox/LeiMailSync.pm | 75 +++++++++++++++++++++++-----------
 t/lei_mail_sync.t              |  5 ++-
 2 files changed, 55 insertions(+), 25 deletions(-)

diff --git a/lib/PublicInbox/LeiMailSync.pm b/lib/PublicInbox/LeiMailSync.pm
index 182b0c22..d93a5810 100644
--- a/lib/PublicInbox/LeiMailSync.pm
+++ b/lib/PublicInbox/LeiMailSync.pm
@@ -6,7 +6,7 @@ package PublicInbox::LeiMailSync;
 use strict;
 use v5.10.1;
 use parent qw(PublicInbox::Lock);
-use DBI;
+use DBI qw(:sql_types); # SQL_BLOB
 use PublicInbox::ContentHash qw(git_sha);
 use Carp ();
 
@@ -90,29 +90,55 @@ CREATE INDEX IF NOT EXISTS idx_fid_name ON blob2name(fid,name)
 
 }
 
+# used to fixup pre-1.7.0 folders
+sub update_fid ($$$) {
+	my ($dbh, $fid, $loc) = @_;
+	my $sth = $dbh->prepare(<<'');
+UPDATE folders SET loc = ? WHERE fid = ?
+
+	$sth->bind_param(1, $loc, SQL_BLOB);
+	$sth->bind_param(2, $fid);
+	$sth->execute;
+}
+
+sub get_fid ($$$) {
+	my ($sth, $folder, $dbh) = @_; # $dbh is set iff RW
+	$sth->bind_param(1, $folder, SQL_BLOB);
+	$sth->execute;
+	my ($fid) = $sth->fetchrow_array;
+	if (defined $fid) { # for downgrade+upgrade (1.8 -> 1.7 -> 1.8)
+		$dbh->do('DELETE FROM folders WHERE loc = ? AND fid != ?',
+			undef, $folder, $fid) if defined($dbh);
+	} else {
+		$sth->execute($folder); # fixup old stuff
+		($fid) = $sth->fetchrow_array;
+		update_fid($dbh, $fid, $folder) if defined($fid) && $dbh;
+	}
+	$fid;
+}
+
 sub fid_for {
 	my ($self, $folder, $rw) = @_;
 	my $dbh = $self->{dbh} //= dbh_new($self, $rw);
-	my $sel = 'SELECT fid FROM folders WHERE loc = ? LIMIT 1';
-	my ($fid) = $dbh->selectrow_array($sel, undef, $folder);
-	return $fid if defined $fid;
+	my $sth = $dbh->prepare_cached(<<'', undef, 1);
+SELECT fid FROM folders WHERE loc = ? LIMIT 1
+
+	my $rw_dbh = $rw ? $dbh : undef;
+	my $fid = get_fid($sth, $folder, $rw_dbh);
+	return $fid if defined($fid);
 
 	# caller had trailing slash (LeiToMail)
 	if ($folder =~ s!\A((?:maildir|mh):.*?)/+\z!$1!i) {
-		($fid) = $dbh->selectrow_array($sel, undef, $folder);
+		$fid = get_fid($sth, $folder, $rw_dbh);
 		if (defined $fid) {
-			$dbh->do(<<EOM, undef, $folder, $fid) if $rw;
-UPDATE folders SET loc = ? WHERE fid = ?
-EOM
+			update_fid($dbh, $fid, $folder) if $rw;
 			return $fid;
 		}
 	# sometimes we stored trailing slash..
 	} elsif ($folder =~ m!\A(?:maildir|mh):!i) {
-		($fid) = $dbh->selectrow_array($sel, undef, "$folder/");
+		$fid = get_fid($sth, $folder, $rw_dbh);
 		if (defined $fid) {
-			$dbh->do(<<EOM, undef, $folder, $fid) if $rw;
-UPDATE folders SET loc = ? WHERE fid = ?
-EOM
+			update_fid($dbh, $fid, $folder) if $rw;
 			return $fid;
 		}
 	} elsif ($rw && $folder =~ m!\Aimaps?://!i) {
@@ -129,8 +155,10 @@ EOM
 	$dbh->do('DELETE FROM blob2name WHERE fid = ?', undef, $fid);
 	$dbh->do('DELETE FROM blob2num WHERE fid = ?', undef, $fid);
 
-	my $sth = $dbh->prepare('INSERT INTO folders (fid, loc) VALUES (?, ?)');
-	$sth->execute($fid, $folder);
+	$sth = $dbh->prepare('INSERT INTO folders (fid, loc) VALUES (?, ?)');
+	$sth->bind_param(1, $fid);
+	$sth->bind_param(2, $folder, SQL_BLOB);
+	$sth->execute;
 
 	$fid;
 }
@@ -306,18 +334,17 @@ sub locations_for {
 sub folders {
 	my ($self, @pfx) = @_;
 	my $sql = 'SELECT loc FROM folders';
+	my $re;
 	if (defined($pfx[0])) {
-		$sql .= ' WHERE loc LIKE ? ESCAPE ?';
-		my $anywhere = !!$pfx[1];
-		$pfx[1] = '\\';
-		$pfx[0] =~ s/([%_\\])/\\$1/g; # glob chars
-		$pfx[0] .= '%';
-		substr($pfx[0], 0, 0, '%') if $anywhere;
-	} else {
-		@pfx = (); # [0] may've been undef
+		$sql .= ' WHERE loc REGEXP ?'; # DBD::SQLite uses perlre
+		$re = !!$pfx[1] ? '.*' : '';
+		$re .= quotemeta($pfx[0]);
+		$re .= '.*';
 	}
-	my $dbh = $self->{dbh} //= dbh_new($self);
-	map { $_->[0] } @{$dbh->selectall_arrayref($sql, undef, @pfx)};
+	my $sth = ($self->{dbh} //= dbh_new($self))->prepare($sql);
+	$sth->bind_param(1, $re) if defined($re);
+	$sth->execute;
+	map { $_->[0] } @{$sth->fetchall_arrayref};
 }
 
 sub local_blob {
diff --git a/t/lei_mail_sync.t b/t/lei_mail_sync.t
index 4439b818..74a6c8aa 100644
--- a/t/lei_mail_sync.t
+++ b/t/lei_mail_sync.t
@@ -1,5 +1,5 @@
 #!perl -w
-# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 use strict;
 use v5.10.1;
@@ -19,6 +19,8 @@ my $deadbeef = "\xde\xad\xbe\xef";
 is($lms->set_src($deadbeef, $imap, 1), 1, 'set IMAP once');
 ok($lms->set_src($deadbeef, $imap, 1) == 0, 'set IMAP idempotently');
 is_deeply([$ro->folders], [$imap], 'IMAP folder added');
+note explain([$ro->folders($imap)]);
+note explain([$imap, [$ro->folders]]);
 is_deeply([$ro->folders($imap)], [$imap], 'IMAP folder with full GLOB');
 is_deeply([$ro->folders('imaps://bob@[::1]/INBOX')], [$imap],
 		'IMAP folder with partial GLOB');
@@ -37,6 +39,7 @@ is_deeply($ro->locations_for($deadbeef),
 
 if ('mess things up pretend old bug') {
 	$lms->lms_write_prepare;
+	diag "messing things up";
 	$lms->{dbh}->do('UPDATE folders SET loc = ? WHERE loc = ?', undef,
 			"$maildir/", $maildir);
 	ok(delete $lms->{fmap}, 'clear folder map');

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 2/2] lei_mail_sync: store OIDs and Maildir filenames as blobs
  2022-04-02  1:13 [PATCH 0/2] lei_mail_sync ambiguity fixes Eric Wong
  2022-04-02  1:13 ` [PATCH 1/2] lei_mail_sync: ensure URLs and folder names are stored as binary Eric Wong
@ 2022-04-02  1:13 ` Eric Wong
  1 sibling, 0 replies; 4+ messages in thread
From: Eric Wong @ 2022-04-02  1:13 UTC (permalink / raw)
  To: meta

DBD::SQLite doesn't seem to use SQL_BLOB automatically, which
can lead to ambiguity in some cases (especially interoperating
with other tools).

Downgrading to lei 1.7.0 will cause problems, but upgrading
appears transparent after weeks of tests.
---
 lib/PublicInbox/LeiMailSync.pm | 97 ++++++++++++++++++++++++++++------
 1 file changed, 82 insertions(+), 15 deletions(-)

diff --git a/lib/PublicInbox/LeiMailSync.pm b/lib/PublicInbox/LeiMailSync.pm
index d93a5810..a9a65fd6 100644
--- a/lib/PublicInbox/LeiMailSync.pm
+++ b/lib/PublicInbox/LeiMailSync.pm
@@ -123,7 +123,7 @@ sub fid_for {
 	my $sth = $dbh->prepare_cached(<<'', undef, 1);
 SELECT fid FROM folders WHERE loc = ? LIMIT 1
 
-	my $rw_dbh = $rw ? $dbh : undef;
+	my $rw_dbh = $dbh->{ReadOnly} ? undef : $dbh;
 	my $fid = get_fid($sth, $folder, $rw_dbh);
 	return $fid if defined($fid);
 
@@ -173,36 +173,60 @@ sub set_src {
 	my ($self, $oidbin, $folder, $id) = @_;
 	my $lk = $self->lock_for_scope;
 	my $fid = $self->{fmap}->{$folder} //= fid_for($self, $folder, 1);
-	my $sth;
+	my $dbh = $self->{dbh};
+	my ($sth, @param3, $del_old);
 	if (ref($id)) { # scalar name
-		$id = $$id;
-		$sth = $self->{dbh}->prepare_cached(<<'');
+		@param3 = ($$id, SQL_BLOB);
+		$sth = $dbh->prepare_cached(<<'');
 INSERT OR IGNORE INTO blob2name (oidbin, fid, name) VALUES (?, ?, ?)
 
+		$del_old = $dbh->prepare_cached(<<'');
+DELETE FROM blob2name WHERE oidbin = ? AND fid = ? AND name = ?
+
 	} else { # numeric ID (IMAP UID, MH number)
-		$sth = $self->{dbh}->prepare_cached(<<'');
+		@param3 = ($id);
+		$sth = $dbh->prepare_cached(<<'');
 INSERT OR IGNORE INTO blob2num (oidbin, fid, uid) VALUES (?, ?, ?)
 
+		$del_old = $dbh->prepare_cached(<<'');
+DELETE FROM blob2num WHERE oidbin = ? AND fid = ? AND uid = ?
+
 	}
-	$sth->execute($oidbin, $fid, $id);
+	$sth->bind_param(1, $oidbin, SQL_BLOB);
+	$sth->bind_param(2, $fid);
+	$sth->bind_param(3, @param3);
+	my $ret = $sth->execute;
+	$del_old->execute($oidbin, $fid, $param3[0]);
+	$ret;
 }
 
 sub clear_src {
 	my ($self, $folder, $id) = @_;
 	my $lk = $self->lock_for_scope;
 	my $fid = $self->{fmap}->{$folder} //= fid_for($self, $folder, 1);
-	my $sth;
+	my ($sth, @param3);
 	if (ref($id)) { # scalar name
-		$id = $$id;
+		@param3 = ($$id, SQL_BLOB);
 		$sth = $self->{dbh}->prepare_cached(<<'');
 DELETE FROM blob2name WHERE fid = ? AND name = ?
 
 	} else {
+		@param3 = ($id);
 		$sth = $self->{dbh}->prepare_cached(<<'');
 DELETE FROM blob2num WHERE fid = ? AND uid = ?
 
 	}
-	$sth->execute($fid, $id);
+	$sth->bind_param(1, $fid);
+	$sth->bind_param(2, @param3);
+	my $ret = $sth->execute;
+
+	# older versions may not have used SQL_BLOB:
+	if (defined($ret) && $ret == 0 && scalar(@param3) == 2) {
+		$sth->bind_param(1, $fid);
+		$sth->bind_param(2, $param3[0]);
+		$ret = $sth->execute;
+	}
+	$ret;
 }
 
 # Maildir-only
@@ -215,13 +239,27 @@ sub mv_src {
 UPDATE blob2name SET name = ? WHERE fid = ? AND oidbin = ? AND name = ?
 
 	# eval since unique constraint may fail due to race
-	my $nr = eval { $sth->execute($newbn, $fid, $oidbin, $$id) };
+	$sth->bind_param(1, $newbn, SQL_BLOB);
+	$sth->bind_param(2, $fid);
+	$sth->bind_param(3, $oidbin, SQL_BLOB);
+	$sth->bind_param(4, $$id, SQL_BLOB);
+	my $nr = eval { $sth->execute };
 	if (!defined($nr) || $nr == 0) { # $nr may be `0E0'
+		# delete from old, pre-SQL_BLOB rows:
+		my $del_old = $self->{dbh}->prepare_cached(<<'');
+DELETE FROM blob2name WHERE fid = ? AND oidbin = ? AND name = ?
+
+		$del_old->execute($fid, $oidbin, $$id); # missing-OK
+		$del_old->execute($fid, $oidbin, $newbn); # ditto
+
 		# may race with a clear_src, ensure new value exists
 		$sth = $self->{dbh}->prepare_cached(<<'');
 INSERT OR IGNORE INTO blob2name (oidbin, fid, name) VALUES (?, ?, ?)
 
-		$sth->execute($oidbin, $fid, $newbn);
+		$sth->bind_param(1, $oidbin, SQL_BLOB);
+		$sth->bind_param(2, $fid);
+		$sth->bind_param(3, $newbn, SQL_BLOB);
+		$sth->execute;
 	}
 	$self->{dbh}->commit;
 }
@@ -301,18 +339,39 @@ SELECT $op(uid) FROM blob2num WHERE fid = ?
 # returns a { location => [ list-of-ids-or-names ] } mapping
 sub locations_for {
 	my ($self, $oidbin) = @_;
-	my ($fid, $sth, $id, %fid2id);
+	my ($fid, $sth, $id, %fid2id, %seen);
 	my $dbh = $self->{dbh} //= dbh_new($self);
 	$sth = $dbh->prepare('SELECT fid,uid FROM blob2num WHERE oidbin = ?');
+	$sth->bind_param(1, $oidbin, SQL_BLOB);
+	$sth->execute;
+	while (my ($fid, $uid) = $sth->fetchrow_array) {
+		push @{$fid2id{$fid}}, $uid;
+		$seen{"$uid.$fid"} = 1;
+	}
+
+	# deal with 1.7.0 DBs :<
 	$sth->execute($oidbin);
 	while (my ($fid, $uid) = $sth->fetchrow_array) {
+		next if $seen{"$uid.$fid"};
 		push @{$fid2id{$fid}}, $uid;
 	}
+
+	%seen = ();
 	$sth = $dbh->prepare('SELECT fid,name FROM blob2name WHERE oidbin = ?');
+	$sth->bind_param(1, $oidbin, SQL_BLOB);
+	$sth->execute;
+	while (my ($fid, $name) = $sth->fetchrow_array) {
+		push @{$fid2id{$fid}}, $name;
+		$seen{"$fid.$name"} = 1;
+	}
+
+	# deal with 1.7.0 DBs :<
 	$sth->execute($oidbin);
 	while (my ($fid, $name) = $sth->fetchrow_array) {
+		next if $seen{"$fid.$name"};
 		push @{$fid2id{$fid}}, $name;
 	}
+
 	$sth = $dbh->prepare('SELECT loc FROM folders WHERE fid = ? LIMIT 1');
 	my $ret = {};
 	while (my ($fid, $ids) = each %fid2id) {
@@ -355,7 +414,8 @@ SELECT f.loc,b.name FROM blob2name b
 LEFT JOIN folders f ON b.fid = f.fid
 WHERE b.oidbin = ?
 
-	$b2n->execute(pack('H*', $oidhex));
+	$b2n->bind_param(1, pack('H*', $oidhex), SQL_BLOB);
+	$b2n->execute;
 	while (my ($d, $n) = $b2n->fetchrow_array) {
 		substr($d, 0, length('maildir:')) = '';
 		# n.b. both mbsync and offlineimap use ":2," as a suffix
@@ -572,7 +632,8 @@ sub num_oidbin ($$$) {
 SELECT oidbin FROM blob2num WHERE fid = ? AND uid = ? ORDER BY _rowid_
 EOM
 	$sth->execute($fid, $uid);
-	map { $_->[0] } @{$sth->fetchall_arrayref};
+	my %uniq; # for public-inbox <= 1.7.0
+	grep { !$uniq{$_}++ } map { $_->[0] } @{$sth->fetchall_arrayref};
 }
 
 sub name_oidbin ($$$) {
@@ -581,8 +642,14 @@ sub name_oidbin ($$$) {
 	my $sth = $self->{dbh}->prepare_cached(<<EOM, undef, 1);
 SELECT oidbin FROM blob2name WHERE fid = ? AND name = ?
 EOM
+	$sth->bind_param(1, $fid);
+	$sth->bind_param(2, $nm, SQL_BLOB);
+	$sth->execute;
+	my @bin = map { $_->[0] } @{$sth->fetchall_arrayref};
 	$sth->execute($fid, $nm);
-	map { $_->[0] } @{$sth->fetchall_arrayref};
+	my @old = map { $_->[0] } @{$sth->fetchall_arrayref};
+	my %uniq; # for public-inbox <= 1.7.0
+	grep { !$uniq{$_}++ } (@bin, @old);
 }
 
 sub imap_oidhex {

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH 1/2] lei_mail_sync: ensure URLs and folder names are stored as binary
  2022-04-02  1:13 ` [PATCH 1/2] lei_mail_sync: ensure URLs and folder names are stored as binary Eric Wong
@ 2022-04-02 23:45   ` Eric Wong
  0 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2022-04-02 23:45 UTC (permalink / raw)
  To: meta

Eric Wong <e@80x24.org> wrote:
> Apparently leaving {sqlite_unicode} unset isn't enough, and
> there's subtle differences where BLOBs are stored differently
> than TEXT when dealing with binary data.  We also want to avoid
> odd cases where SQLite will attempt to treat a number-like value
> as an integer.
> 
> This should avoid problems in case non-UTF-8 URLs and pathnames are
> used.  They'll automatically be upgraded if not, but downgrades
> to older lei would cause duplicates to appear.

Ugh, no, not yet.  "lei q" and "lei up" seem to work fine, but
"lei import" is screwed up :x

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2022-04-02 23:45 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-02  1:13 [PATCH 0/2] lei_mail_sync ambiguity fixes Eric Wong
2022-04-02  1:13 ` [PATCH 1/2] lei_mail_sync: ensure URLs and folder names are stored as binary Eric Wong
2022-04-02 23:45   ` Eric Wong
2022-04-02  1:13 ` [PATCH 2/2] lei_mail_sync: store OIDs and Maildir filenames as blobs Eric Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).