From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.2 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF shortcircuit=no autolearn=ham autolearn_force=no version=3.4.6 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 32CA61F4D0 for ; Wed, 4 Dec 2024 19:39:15 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1733341155; bh=AeorCUwXC6K6EHN/MEAsGalKCd1CneqdgKgzMUpvFP4=; h=From:To:Subject:Date:In-Reply-To:References:From; b=EQ4k2Lmqx/FhI8gYzJp1rInfNpE4ydSVDqnIJM70K6YF+LdQtKarEREpkJ5sbgCLY ZzX5OmiDaDKdT9maTrpIeqtwfx5O5534V7y+yWczC6tRPwaq4vNm0TyaNWjdU/y0FO REti9dL12cB1Ne9884r1/VKF69gdvOK76MVRMqjk= From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 1/3] sqlite: avoid incorrect/deprecated `LIKE' use Date: Wed, 4 Dec 2024 19:39:12 +0000 Message-ID: <20241204193914.3227868-2-e@80x24.org> In-Reply-To: <20241204193914.3227868-1-e@80x24.org> References: <20241204193914.3227868-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: The `case_sensitive_like' pragma is deprecated since SQLite 3.44+. Furthermore, our use of `LIKE' in SharedKV->keys seemed to be broken anyways since `LIKE' doesn't seem to work with binary data (stored with SQL_BLOB), but neither does `GLOB'. So avoid `LIKE' entirely. For non-SQL_BLOB data we'll favor the always-case-sensitive GLOB. For SQL_BLOB data, we must rely on the Perl regexp engine from what I can tell. `GLOB' is preferred where possible since SQLite will be able to use indices in some cases whereas `REGEXP' cannot. Fixing SharedKV->keys should improve bash completion for lei. Some common SQLite-related utilities are now in a PublicInbox::SQLiteUtil package which will be expanded to deal with more commonalities between SQLite users in our tree. --- MANIFEST | 2 ++ lib/PublicInbox/ExtSearchIdx.pm | 12 ++++---- lib/PublicInbox/LeiMailSync.pm | 14 +++------- lib/PublicInbox/SQLiteUtil.pm | 30 ++++++++++++++++++++ lib/PublicInbox/SharedKV.pm | 23 ++++++---------- t/shared_kv.t | 5 ++++ t/sqlite_util.t | 49 +++++++++++++++++++++++++++++++++ 7 files changed, 105 insertions(+), 30 deletions(-) create mode 100644 lib/PublicInbox/SQLiteUtil.pm create mode 100644 t/sqlite_util.t diff --git a/MANIFEST b/MANIFEST index 1305ed8a..b0b4f71c 100644 --- a/MANIFEST +++ b/MANIFEST @@ -334,6 +334,7 @@ lib/PublicInbox/RepoList.pm lib/PublicInbox/RepoSnapshot.pm lib/PublicInbox/RepoTree.pm lib/PublicInbox/SHA.pm +lib/PublicInbox/SQLiteUtil.pm lib/PublicInbox/SaPlugin/ListMirror.pm lib/PublicInbox/SaPlugin/ListMirror.pod lib/PublicInbox/Search.pm @@ -614,6 +615,7 @@ t/solve/bare.patch t/solver_git.t t/spamcheck_spamc.t t/spawn.t +t/sqlite_util.t t/syscall.t t/tail_notify.t t/thread-cycle.t diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm index cead0f8a..d8db7d4b 100644 --- a/lib/PublicInbox/ExtSearchIdx.pm +++ b/lib/PublicInbox/ExtSearchIdx.pm @@ -21,6 +21,7 @@ use Carp qw(croak carp); use Scalar::Util qw(blessed); use Sys::Hostname qw(hostname); use File::Glob qw(bsd_glob GLOB_NOSORT); +use PublicInbox::SQLiteUtil; use PublicInbox::Isearch; use PublicInbox::MultiGit; use PublicInbox::Spawn (); @@ -461,15 +462,14 @@ EOM DELETE FROM inboxes WHERE ibx_id = ? # drop last_commit info - my $pat = $eidx_key; - $pat =~ s/([_%\\])/\\$1/g; - $self->{oidx}->dbh->do('PRAGMA case_sensitive_like = ON'); + # We use GLOB in addition to REGEXP since GLOB can use indices my $lc_i = $self->{oidx}->dbh->prepare(<<''); -SELECT key FROM eidx_meta WHERE key LIKE ? ESCAPE ? +SELECT key FROM eidx_meta WHERE key GLOB ? AND key REGEXP ? - $lc_i->execute("lc-%:$pat//%", '\\'); + my $ekg = 'lc-v[1-9]*:'. + PublicInbox::SQLiteUtil::escape_glob($eidx_key).'//*'; + $lc_i->execute($ekg, qr!\Alc-v[1-9]+:\Q$eidx_key\E//!); while (my ($key) = $lc_i->fetchrow_array) { - next if $key !~ m!\Alc-v[1-9]+:\Q$eidx_key\E//!; warn "# removing $key\n"; $self->{oidx}->dbh->do(<<'', undef, $key); DELETE FROM eidx_meta WHERE key = ? diff --git a/lib/PublicInbox/LeiMailSync.pm b/lib/PublicInbox/LeiMailSync.pm index d0f6d7b4..d23f6b4b 100644 --- a/lib/PublicInbox/LeiMailSync.pm +++ b/lib/PublicInbox/LeiMailSync.pm @@ -12,6 +12,7 @@ use PublicInbox::ContentHash qw(git_sha); use Carp (); use PublicInbox::Git qw(%HEXLEN2SHA); use PublicInbox::IO qw(read_all); +use PublicInbox::SQLiteUtil; sub dbh_new { my ($self) = @_; @@ -31,7 +32,6 @@ sub dbh_new { # no sqlite_unicode, here, all strings are binary create_tables($self, $dbh); $dbh->do('PRAGMA journal_mode = WAL') if $creat; - $dbh->do('PRAGMA case_sensitive_like = ON'); $dbh; } @@ -421,18 +421,12 @@ sub locations_for { # returns a list of folders used for completion sub folders { - my ($self, @pfx) = @_; + my ($self, $pfx, $anywhere) = @_; my $sql = 'SELECT loc FROM folders'; my $re; - if (defined($pfx[0])) { + if (defined $pfx) { $sql .= ' WHERE loc REGEXP ?'; # DBD::SQLite uses perlre - if (ref($pfx[0])) { # assume qr// "Regexp" - $re = $pfx[0]; - } else { - $re = !!$pfx[1] ? '.*' : ''; - $re .= quotemeta($pfx[0]); - $re .= '.*'; - } + $re = PublicInbox::SQLiteUtil::mk_sqlite_re $pfx, $anywhere; } my $sth = ($self->{dbh} //= dbh_new($self))->prepare($sql); $sth->bind_param(1, $re) if defined($re); diff --git a/lib/PublicInbox/SQLiteUtil.pm b/lib/PublicInbox/SQLiteUtil.pm new file mode 100644 index 00000000..68e0726d --- /dev/null +++ b/lib/PublicInbox/SQLiteUtil.pm @@ -0,0 +1,30 @@ +# Copyright (C) all contributors +# License: AGPL-3.0+ + +# common bits for SQLite users in our codebase +package PublicInbox::SQLiteUtil; +use v5.12; + +my %SQLITE_GLOB_MAP = ( + '[' => '[[]', + ']' => '[]]', + '*' => '[*]', + '?' => '[?]' +); + +# n.b. GLOB doesn't seem to work on data inserted w/ SQL_BLOB +sub escape_glob ($) { + my ($s) = @_; + $s =~ s/([\[\]\*\?])/$SQLITE_GLOB_MAP{$1}/sge; + $s; +} + +# DBD::SQLite maps REGEXP to use perlre, and that works on SQL_BLOB +# whereas GLOB and LIKE don't seem to... +sub mk_sqlite_re ($$) { + my ($pfx, $anywhere) = @_; + ref($pfx) ? $pfx # assume qr// Regexp + : ($anywhere ? '.*' : '^')."\Q$pfx\E.*"; +} + +1; diff --git a/lib/PublicInbox/SharedKV.pm b/lib/PublicInbox/SharedKV.pm index 89ab3f74..51ece48d 100644 --- a/lib/PublicInbox/SharedKV.pm +++ b/lib/PublicInbox/SharedKV.pm @@ -12,6 +12,7 @@ use File::Temp qw(tempdir); use DBI qw(:sql_types); # SQL_BLOB use PublicInbox::Spawn; use File::Path qw(rmtree); +use PublicInbox::SQLiteUtil; sub dbh { my ($self, $lock) = @_; @@ -79,23 +80,17 @@ SELECT k,v FROM kv } sub keys { - my ($self, @pfx) = @_; + my ($self, $pfx, $anywhere) = @_; + # n.b. can't use GLOB for index optimization due to SQL_BLOB, + # so regexps it is. my $sql = 'SELECT k FROM kv'; - if (defined $pfx[0]) { - $sql .= ' WHERE k LIKE ? ESCAPE ?'; - my $anywhere = !!$pfx[1]; - $pfx[1] = '\\'; - $pfx[0] =~ s/([%_\\])/\\$1/g; # glob chars - $pfx[0] .= '%'; - substr($pfx[0], 0, 0, '%') if $anywhere; - } else { - @pfx = (); # [0] may've been undef + my $re; + if (defined $pfx) { + $sql .= ' WHERE k REGEXP ?'; # DBD::SQLite uses perlre + $re = PublicInbox::SQLiteUtil::mk_sqlite_re $pfx, $anywhere; } my $sth = $self->dbh->prepare($sql); - if (@pfx) { - $sth->bind_param(1, $pfx[0], SQL_BLOB); - $sth->bind_param(2, $pfx[1]); - } + $sth->bind_param(1, $re) if defined $re; $sth->execute; map { $_->[0] } @{$sth->fetchall_arrayref}; } diff --git a/t/shared_kv.t b/t/shared_kv.t index 8dfd3b25..aa1ce4e3 100644 --- a/t/shared_kv.t +++ b/t/shared_kv.t @@ -46,4 +46,9 @@ $skv->dbh; ok($skv->set_maybe('02', '2'), "`02' set"); ok($skv->set_maybe('2', '2'), "`2' set (no match on `02')"); +my @k = $skv->keys('2'); +is_deeply \@k, [ '2' ], 'prefix match on ->keys'; +@k = sort $skv->keys('2', 1); +is_deeply \@k, [ '02', '2' ], 'anywhere match on ->keys'; + done_testing; diff --git a/t/sqlite_util.t b/t/sqlite_util.t new file mode 100644 index 00000000..f41285e0 --- /dev/null +++ b/t/sqlite_util.t @@ -0,0 +1,49 @@ +#!perl -w +# Copyright (C) all contributors +# License: AGPL-3.0+ +use v5.12; +use PublicInbox::TestCommon; +require_mods 'DBD::SQLite'; +use_ok 'PublicInbox::SQLiteUtil'; +require DBI; +DBI->import(':sql_types'); + +my $dbh = DBI->connect('dbi:SQLite:dbname=:memory:', '', '', { + AutoCommit => 1, + RaiseError => 1, + PrintError => 0, + sqlite_use_immediate_transaction => 1, +}); + +$dbh->do('CREATE TABLE test (key BLOB NOT NULL, UNIQUE (key))'); + +my $ins = $dbh->prepare('INSERT INTO test (key) VALUES (?)'); +my $sel = $dbh->prepare('SELECT key FROM test WHERE key GLOB ?'); +my $non_utf8 = "h\x{e5}llo[wor]ld!"; +my $us_ascii = 'h*llo[wor]ld?'; + +$dbh->begin_work; +my @SQL_BLOB = (SQL_BLOB()); +@SQL_BLOB = (); # FIXME: can't get GLOB to work w/ SQL_BLOB +for my $k ($us_ascii, $non_utf8) { + $ins->bind_param(1, $k, @SQL_BLOB); + $ins->execute; +} +$dbh->commit; + +$sel->bind_param(1, '*', @SQL_BLOB); +$sel->execute; +my $rows = $sel->fetchall_arrayref; +is scalar(@$rows), 2, q[`*' got everything]; + +$sel->bind_param(1, PublicInbox::SQLiteUtil::escape_glob($us_ascii), @SQL_BLOB); +$sel->execute; +$rows = $sel->fetchall_arrayref; +is_deeply $rows, [ [ $us_ascii ] ], 'US-ASCII exact match'; + +$sel->bind_param(1, PublicInbox::SQLiteUtil::escape_glob($non_utf8), @SQL_BLOB); +$sel->execute; +$rows = $sel->fetchall_arrayref; +is_deeply $rows, [ [ $non_utf8 ] ], 'ISO-8859-1 exact match'; + +done_testing;