unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 1/2] lei_mail_sync: ensure URLs and folder names are stored as binary
Date: Sat,  2 Apr 2022 01:13:51 +0000	[thread overview]
Message-ID: <20220402011352.30964-2-e@80x24.org> (raw)
In-Reply-To: <20220402011352.30964-1-e@80x24.org>

Apparently leaving {sqlite_unicode} unset isn't enough, and
there's subtle differences where BLOBs are stored differently
than TEXT when dealing with binary data.  We also want to avoid
odd cases where SQLite will attempt to treat a number-like value
as an integer.

This should avoid problems in case non-UTF-8 URLs and pathnames are
used.  They'll automatically be upgraded if not, but downgrades
to older lei would cause duplicates to appear.
---
 lib/PublicInbox/LeiMailSync.pm | 75 +++++++++++++++++++++++-----------
 t/lei_mail_sync.t              |  5 ++-
 2 files changed, 55 insertions(+), 25 deletions(-)

diff --git a/lib/PublicInbox/LeiMailSync.pm b/lib/PublicInbox/LeiMailSync.pm
index 182b0c22..d93a5810 100644
--- a/lib/PublicInbox/LeiMailSync.pm
+++ b/lib/PublicInbox/LeiMailSync.pm
@@ -6,7 +6,7 @@ package PublicInbox::LeiMailSync;
 use strict;
 use v5.10.1;
 use parent qw(PublicInbox::Lock);
-use DBI;
+use DBI qw(:sql_types); # SQL_BLOB
 use PublicInbox::ContentHash qw(git_sha);
 use Carp ();
 
@@ -90,29 +90,55 @@ CREATE INDEX IF NOT EXISTS idx_fid_name ON blob2name(fid,name)
 
 }
 
+# used to fixup pre-1.7.0 folders
+sub update_fid ($$$) {
+	my ($dbh, $fid, $loc) = @_;
+	my $sth = $dbh->prepare(<<'');
+UPDATE folders SET loc = ? WHERE fid = ?
+
+	$sth->bind_param(1, $loc, SQL_BLOB);
+	$sth->bind_param(2, $fid);
+	$sth->execute;
+}
+
+sub get_fid ($$$) {
+	my ($sth, $folder, $dbh) = @_; # $dbh is set iff RW
+	$sth->bind_param(1, $folder, SQL_BLOB);
+	$sth->execute;
+	my ($fid) = $sth->fetchrow_array;
+	if (defined $fid) { # for downgrade+upgrade (1.8 -> 1.7 -> 1.8)
+		$dbh->do('DELETE FROM folders WHERE loc = ? AND fid != ?',
+			undef, $folder, $fid) if defined($dbh);
+	} else {
+		$sth->execute($folder); # fixup old stuff
+		($fid) = $sth->fetchrow_array;
+		update_fid($dbh, $fid, $folder) if defined($fid) && $dbh;
+	}
+	$fid;
+}
+
 sub fid_for {
 	my ($self, $folder, $rw) = @_;
 	my $dbh = $self->{dbh} //= dbh_new($self, $rw);
-	my $sel = 'SELECT fid FROM folders WHERE loc = ? LIMIT 1';
-	my ($fid) = $dbh->selectrow_array($sel, undef, $folder);
-	return $fid if defined $fid;
+	my $sth = $dbh->prepare_cached(<<'', undef, 1);
+SELECT fid FROM folders WHERE loc = ? LIMIT 1
+
+	my $rw_dbh = $rw ? $dbh : undef;
+	my $fid = get_fid($sth, $folder, $rw_dbh);
+	return $fid if defined($fid);
 
 	# caller had trailing slash (LeiToMail)
 	if ($folder =~ s!\A((?:maildir|mh):.*?)/+\z!$1!i) {
-		($fid) = $dbh->selectrow_array($sel, undef, $folder);
+		$fid = get_fid($sth, $folder, $rw_dbh);
 		if (defined $fid) {
-			$dbh->do(<<EOM, undef, $folder, $fid) if $rw;
-UPDATE folders SET loc = ? WHERE fid = ?
-EOM
+			update_fid($dbh, $fid, $folder) if $rw;
 			return $fid;
 		}
 	# sometimes we stored trailing slash..
 	} elsif ($folder =~ m!\A(?:maildir|mh):!i) {
-		($fid) = $dbh->selectrow_array($sel, undef, "$folder/");
+		$fid = get_fid($sth, $folder, $rw_dbh);
 		if (defined $fid) {
-			$dbh->do(<<EOM, undef, $folder, $fid) if $rw;
-UPDATE folders SET loc = ? WHERE fid = ?
-EOM
+			update_fid($dbh, $fid, $folder) if $rw;
 			return $fid;
 		}
 	} elsif ($rw && $folder =~ m!\Aimaps?://!i) {
@@ -129,8 +155,10 @@ EOM
 	$dbh->do('DELETE FROM blob2name WHERE fid = ?', undef, $fid);
 	$dbh->do('DELETE FROM blob2num WHERE fid = ?', undef, $fid);
 
-	my $sth = $dbh->prepare('INSERT INTO folders (fid, loc) VALUES (?, ?)');
-	$sth->execute($fid, $folder);
+	$sth = $dbh->prepare('INSERT INTO folders (fid, loc) VALUES (?, ?)');
+	$sth->bind_param(1, $fid);
+	$sth->bind_param(2, $folder, SQL_BLOB);
+	$sth->execute;
 
 	$fid;
 }
@@ -306,18 +334,17 @@ sub locations_for {
 sub folders {
 	my ($self, @pfx) = @_;
 	my $sql = 'SELECT loc FROM folders';
+	my $re;
 	if (defined($pfx[0])) {
-		$sql .= ' WHERE loc LIKE ? ESCAPE ?';
-		my $anywhere = !!$pfx[1];
-		$pfx[1] = '\\';
-		$pfx[0] =~ s/([%_\\])/\\$1/g; # glob chars
-		$pfx[0] .= '%';
-		substr($pfx[0], 0, 0, '%') if $anywhere;
-	} else {
-		@pfx = (); # [0] may've been undef
+		$sql .= ' WHERE loc REGEXP ?'; # DBD::SQLite uses perlre
+		$re = !!$pfx[1] ? '.*' : '';
+		$re .= quotemeta($pfx[0]);
+		$re .= '.*';
 	}
-	my $dbh = $self->{dbh} //= dbh_new($self);
-	map { $_->[0] } @{$dbh->selectall_arrayref($sql, undef, @pfx)};
+	my $sth = ($self->{dbh} //= dbh_new($self))->prepare($sql);
+	$sth->bind_param(1, $re) if defined($re);
+	$sth->execute;
+	map { $_->[0] } @{$sth->fetchall_arrayref};
 }
 
 sub local_blob {
diff --git a/t/lei_mail_sync.t b/t/lei_mail_sync.t
index 4439b818..74a6c8aa 100644
--- a/t/lei_mail_sync.t
+++ b/t/lei_mail_sync.t
@@ -1,5 +1,5 @@
 #!perl -w
-# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 use strict;
 use v5.10.1;
@@ -19,6 +19,8 @@ my $deadbeef = "\xde\xad\xbe\xef";
 is($lms->set_src($deadbeef, $imap, 1), 1, 'set IMAP once');
 ok($lms->set_src($deadbeef, $imap, 1) == 0, 'set IMAP idempotently');
 is_deeply([$ro->folders], [$imap], 'IMAP folder added');
+note explain([$ro->folders($imap)]);
+note explain([$imap, [$ro->folders]]);
 is_deeply([$ro->folders($imap)], [$imap], 'IMAP folder with full GLOB');
 is_deeply([$ro->folders('imaps://bob@[::1]/INBOX')], [$imap],
 		'IMAP folder with partial GLOB');
@@ -37,6 +39,7 @@ is_deeply($ro->locations_for($deadbeef),
 
 if ('mess things up pretend old bug') {
 	$lms->lms_write_prepare;
+	diag "messing things up";
 	$lms->{dbh}->do('UPDATE folders SET loc = ? WHERE loc = ?', undef,
 			"$maildir/", $maildir);
 	ok(delete $lms->{fmap}, 'clear folder map');

  reply	other threads:[~2022-04-02  1:13 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-04-02  1:13 [PATCH 0/2] lei_mail_sync ambiguity fixes Eric Wong
2022-04-02  1:13 ` Eric Wong [this message]
2022-04-02 23:45   ` [PATCH 1/2] lei_mail_sync: ensure URLs and folder names are stored as binary Eric Wong
2022-04-02  1:13 ` [PATCH 2/2] lei_mail_sync: store OIDs and Maildir filenames as blobs Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220402011352.30964-2-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).