From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 1/3] sqlite: avoid incorrect/deprecated `LIKE' use
Date: Wed, 4 Dec 2024 19:39:12 +0000 [thread overview]
Message-ID: <20241204193914.3227868-2-e@80x24.org> (raw)
In-Reply-To: <20241204193914.3227868-1-e@80x24.org>
The `case_sensitive_like' pragma is deprecated since
SQLite 3.44+. Furthermore, our use of `LIKE' in SharedKV->keys
seemed to be broken anyways since `LIKE' doesn't seem to
work with binary data (stored with SQL_BLOB), but neither does
`GLOB'.
So avoid `LIKE' entirely. For non-SQL_BLOB data we'll favor the
always-case-sensitive GLOB. For SQL_BLOB data, we must rely on
the Perl regexp engine from what I can tell. `GLOB' is
preferred where possible since SQLite will be able to use
indices in some cases whereas `REGEXP' cannot.
Fixing SharedKV->keys should improve bash completion for lei.
Some common SQLite-related utilities are now in a
PublicInbox::SQLiteUtil package which will be expanded to deal
with more commonalities between SQLite users in our tree.
---
MANIFEST | 2 ++
lib/PublicInbox/ExtSearchIdx.pm | 12 ++++----
lib/PublicInbox/LeiMailSync.pm | 14 +++-------
lib/PublicInbox/SQLiteUtil.pm | 30 ++++++++++++++++++++
lib/PublicInbox/SharedKV.pm | 23 ++++++----------
t/shared_kv.t | 5 ++++
t/sqlite_util.t | 49 +++++++++++++++++++++++++++++++++
7 files changed, 105 insertions(+), 30 deletions(-)
create mode 100644 lib/PublicInbox/SQLiteUtil.pm
create mode 100644 t/sqlite_util.t
diff --git a/MANIFEST b/MANIFEST
index 1305ed8a..b0b4f71c 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -334,6 +334,7 @@ lib/PublicInbox/RepoList.pm
lib/PublicInbox/RepoSnapshot.pm
lib/PublicInbox/RepoTree.pm
lib/PublicInbox/SHA.pm
+lib/PublicInbox/SQLiteUtil.pm
lib/PublicInbox/SaPlugin/ListMirror.pm
lib/PublicInbox/SaPlugin/ListMirror.pod
lib/PublicInbox/Search.pm
@@ -614,6 +615,7 @@ t/solve/bare.patch
t/solver_git.t
t/spamcheck_spamc.t
t/spawn.t
+t/sqlite_util.t
t/syscall.t
t/tail_notify.t
t/thread-cycle.t
diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm
index cead0f8a..d8db7d4b 100644
--- a/lib/PublicInbox/ExtSearchIdx.pm
+++ b/lib/PublicInbox/ExtSearchIdx.pm
@@ -21,6 +21,7 @@ use Carp qw(croak carp);
use Scalar::Util qw(blessed);
use Sys::Hostname qw(hostname);
use File::Glob qw(bsd_glob GLOB_NOSORT);
+use PublicInbox::SQLiteUtil;
use PublicInbox::Isearch;
use PublicInbox::MultiGit;
use PublicInbox::Spawn ();
@@ -461,15 +462,14 @@ EOM
DELETE FROM inboxes WHERE ibx_id = ?
# drop last_commit info
- my $pat = $eidx_key;
- $pat =~ s/([_%\\])/\\$1/g;
- $self->{oidx}->dbh->do('PRAGMA case_sensitive_like = ON');
+ # We use GLOB in addition to REGEXP since GLOB can use indices
my $lc_i = $self->{oidx}->dbh->prepare(<<'');
-SELECT key FROM eidx_meta WHERE key LIKE ? ESCAPE ?
+SELECT key FROM eidx_meta WHERE key GLOB ? AND key REGEXP ?
- $lc_i->execute("lc-%:$pat//%", '\\');
+ my $ekg = 'lc-v[1-9]*:'.
+ PublicInbox::SQLiteUtil::escape_glob($eidx_key).'//*';
+ $lc_i->execute($ekg, qr!\Alc-v[1-9]+:\Q$eidx_key\E//!);
while (my ($key) = $lc_i->fetchrow_array) {
- next if $key !~ m!\Alc-v[1-9]+:\Q$eidx_key\E//!;
warn "# removing $key\n";
$self->{oidx}->dbh->do(<<'', undef, $key);
DELETE FROM eidx_meta WHERE key = ?
diff --git a/lib/PublicInbox/LeiMailSync.pm b/lib/PublicInbox/LeiMailSync.pm
index d0f6d7b4..d23f6b4b 100644
--- a/lib/PublicInbox/LeiMailSync.pm
+++ b/lib/PublicInbox/LeiMailSync.pm
@@ -12,6 +12,7 @@ use PublicInbox::ContentHash qw(git_sha);
use Carp ();
use PublicInbox::Git qw(%HEXLEN2SHA);
use PublicInbox::IO qw(read_all);
+use PublicInbox::SQLiteUtil;
sub dbh_new {
my ($self) = @_;
@@ -31,7 +32,6 @@ sub dbh_new {
# no sqlite_unicode, here, all strings are binary
create_tables($self, $dbh);
$dbh->do('PRAGMA journal_mode = WAL') if $creat;
- $dbh->do('PRAGMA case_sensitive_like = ON');
$dbh;
}
@@ -421,18 +421,12 @@ sub locations_for {
# returns a list of folders used for completion
sub folders {
- my ($self, @pfx) = @_;
+ my ($self, $pfx, $anywhere) = @_;
my $sql = 'SELECT loc FROM folders';
my $re;
- if (defined($pfx[0])) {
+ if (defined $pfx) {
$sql .= ' WHERE loc REGEXP ?'; # DBD::SQLite uses perlre
- if (ref($pfx[0])) { # assume qr// "Regexp"
- $re = $pfx[0];
- } else {
- $re = !!$pfx[1] ? '.*' : '';
- $re .= quotemeta($pfx[0]);
- $re .= '.*';
- }
+ $re = PublicInbox::SQLiteUtil::mk_sqlite_re $pfx, $anywhere;
}
my $sth = ($self->{dbh} //= dbh_new($self))->prepare($sql);
$sth->bind_param(1, $re) if defined($re);
diff --git a/lib/PublicInbox/SQLiteUtil.pm b/lib/PublicInbox/SQLiteUtil.pm
new file mode 100644
index 00000000..68e0726d
--- /dev/null
+++ b/lib/PublicInbox/SQLiteUtil.pm
@@ -0,0 +1,30 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# common bits for SQLite users in our codebase
+package PublicInbox::SQLiteUtil;
+use v5.12;
+
+my %SQLITE_GLOB_MAP = (
+ '[' => '[[]',
+ ']' => '[]]',
+ '*' => '[*]',
+ '?' => '[?]'
+);
+
+# n.b. GLOB doesn't seem to work on data inserted w/ SQL_BLOB
+sub escape_glob ($) {
+ my ($s) = @_;
+ $s =~ s/([\[\]\*\?])/$SQLITE_GLOB_MAP{$1}/sge;
+ $s;
+}
+
+# DBD::SQLite maps REGEXP to use perlre, and that works on SQL_BLOB
+# whereas GLOB and LIKE don't seem to...
+sub mk_sqlite_re ($$) {
+ my ($pfx, $anywhere) = @_;
+ ref($pfx) ? $pfx # assume qr// Regexp
+ : ($anywhere ? '.*' : '^')."\Q$pfx\E.*";
+}
+
+1;
diff --git a/lib/PublicInbox/SharedKV.pm b/lib/PublicInbox/SharedKV.pm
index 89ab3f74..51ece48d 100644
--- a/lib/PublicInbox/SharedKV.pm
+++ b/lib/PublicInbox/SharedKV.pm
@@ -12,6 +12,7 @@ use File::Temp qw(tempdir);
use DBI qw(:sql_types); # SQL_BLOB
use PublicInbox::Spawn;
use File::Path qw(rmtree);
+use PublicInbox::SQLiteUtil;
sub dbh {
my ($self, $lock) = @_;
@@ -79,23 +80,17 @@ SELECT k,v FROM kv
}
sub keys {
- my ($self, @pfx) = @_;
+ my ($self, $pfx, $anywhere) = @_;
+ # n.b. can't use GLOB for index optimization due to SQL_BLOB,
+ # so regexps it is.
my $sql = 'SELECT k FROM kv';
- if (defined $pfx[0]) {
- $sql .= ' WHERE k LIKE ? ESCAPE ?';
- my $anywhere = !!$pfx[1];
- $pfx[1] = '\\';
- $pfx[0] =~ s/([%_\\])/\\$1/g; # glob chars
- $pfx[0] .= '%';
- substr($pfx[0], 0, 0, '%') if $anywhere;
- } else {
- @pfx = (); # [0] may've been undef
+ my $re;
+ if (defined $pfx) {
+ $sql .= ' WHERE k REGEXP ?'; # DBD::SQLite uses perlre
+ $re = PublicInbox::SQLiteUtil::mk_sqlite_re $pfx, $anywhere;
}
my $sth = $self->dbh->prepare($sql);
- if (@pfx) {
- $sth->bind_param(1, $pfx[0], SQL_BLOB);
- $sth->bind_param(2, $pfx[1]);
- }
+ $sth->bind_param(1, $re) if defined $re;
$sth->execute;
map { $_->[0] } @{$sth->fetchall_arrayref};
}
diff --git a/t/shared_kv.t b/t/shared_kv.t
index 8dfd3b25..aa1ce4e3 100644
--- a/t/shared_kv.t
+++ b/t/shared_kv.t
@@ -46,4 +46,9 @@ $skv->dbh;
ok($skv->set_maybe('02', '2'), "`02' set");
ok($skv->set_maybe('2', '2'), "`2' set (no match on `02')");
+my @k = $skv->keys('2');
+is_deeply \@k, [ '2' ], 'prefix match on ->keys';
+@k = sort $skv->keys('2', 1);
+is_deeply \@k, [ '02', '2' ], 'anywhere match on ->keys';
+
done_testing;
diff --git a/t/sqlite_util.t b/t/sqlite_util.t
new file mode 100644
index 00000000..f41285e0
--- /dev/null
+++ b/t/sqlite_util.t
@@ -0,0 +1,49 @@
+#!perl -w
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use v5.12;
+use PublicInbox::TestCommon;
+require_mods 'DBD::SQLite';
+use_ok 'PublicInbox::SQLiteUtil';
+require DBI;
+DBI->import(':sql_types');
+
+my $dbh = DBI->connect('dbi:SQLite:dbname=:memory:', '', '', {
+ AutoCommit => 1,
+ RaiseError => 1,
+ PrintError => 0,
+ sqlite_use_immediate_transaction => 1,
+});
+
+$dbh->do('CREATE TABLE test (key BLOB NOT NULL, UNIQUE (key))');
+
+my $ins = $dbh->prepare('INSERT INTO test (key) VALUES (?)');
+my $sel = $dbh->prepare('SELECT key FROM test WHERE key GLOB ?');
+my $non_utf8 = "h\x{e5}llo[wor]ld!";
+my $us_ascii = 'h*llo[wor]ld?';
+
+$dbh->begin_work;
+my @SQL_BLOB = (SQL_BLOB());
+@SQL_BLOB = (); # FIXME: can't get GLOB to work w/ SQL_BLOB
+for my $k ($us_ascii, $non_utf8) {
+ $ins->bind_param(1, $k, @SQL_BLOB);
+ $ins->execute;
+}
+$dbh->commit;
+
+$sel->bind_param(1, '*', @SQL_BLOB);
+$sel->execute;
+my $rows = $sel->fetchall_arrayref;
+is scalar(@$rows), 2, q[`*' got everything];
+
+$sel->bind_param(1, PublicInbox::SQLiteUtil::escape_glob($us_ascii), @SQL_BLOB);
+$sel->execute;
+$rows = $sel->fetchall_arrayref;
+is_deeply $rows, [ [ $us_ascii ] ], 'US-ASCII exact match';
+
+$sel->bind_param(1, PublicInbox::SQLiteUtil::escape_glob($non_utf8), @SQL_BLOB);
+$sel->execute;
+$rows = $sel->fetchall_arrayref;
+is_deeply $rows, [ [ $non_utf8 ] ], 'ISO-8859-1 exact match';
+
+done_testing;
next prev parent reply other threads:[~2024-12-04 19:39 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-12-04 19:39 [PATCH 0/3] some SQLite-related things Eric Wong
2024-12-04 19:39 ` Eric Wong [this message]
2024-12-04 19:39 ` [PATCH 2/3] sqlite: use `BLOB' column type instead of `VARBINARY' Eric Wong
2024-12-04 19:39 ` [PATCH 3/3] SQLiteUtil: hoist out common create_db code Eric Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241204193914.3227868-2-e@80x24.org \
--to=e@80x24.org \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).