From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id D9BD91F9FE for ; Fri, 23 Jul 2021 10:56:12 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 2/3] lei: avoid SQLite COUNT() for dedupe Date: Fri, 23 Jul 2021 10:56:11 +0000 Message-Id: <20210723105612.3883-3-e@80x24.org> In-Reply-To: <20210723105612.3883-1-e@80x24.org> References: <20210723105612.3883-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: SQLite COUNT() is a slow operation that does a full table scan with no conditions. There's no need for it, since lei dedupe only needs to know if it's empty or not to decide between new/ and cur/ for Maildir outputs. --- lib/PublicInbox/LeiDedupe.pm | 5 ++--- lib/PublicInbox/LeiSavedSearch.pm | 8 ++++---- lib/PublicInbox/LeiToMail.pm | 4 ++-- lib/PublicInbox/LeiXSearch.pm | 2 +- lib/PublicInbox/SharedKV.pm | 7 +++++++ 5 files changed, 16 insertions(+), 10 deletions(-) diff --git a/lib/PublicInbox/LeiDedupe.pm b/lib/PublicInbox/LeiDedupe.pm index ed52e417..32f99cd0 100644 --- a/lib/PublicInbox/LeiDedupe.pm +++ b/lib/PublicInbox/LeiDedupe.pm @@ -127,10 +127,9 @@ sub pause_dedupe { delete($skv->{dbh}) if $skv; } -sub dedupe_nr { +sub has_entries { my $skv = $_[0]->[0] or return undef; - my @n = $skv->count; - $n[0]; + $skv->has_entries; } 1; diff --git a/lib/PublicInbox/LeiSavedSearch.pm b/lib/PublicInbox/LeiSavedSearch.pm index 929380ed..cfbf68c3 100644 --- a/lib/PublicInbox/LeiSavedSearch.pm +++ b/lib/PublicInbox/LeiSavedSearch.pm @@ -315,11 +315,11 @@ E: rename($dir_old, $dir_new) error: $! EOM } -# cf. LeiDedupe->dedupe_nr -sub dedupe_nr { +# cf. LeiDedupe->has_entries +sub has_entries { my $oidx = $_[0]->{oidx} // die 'BUG: no {oidx}'; - my @n = $oidx->{dbh}->selectrow_array('SELECT COUNT(*) FROM over'); - $n[0]; + my @n = $oidx->{dbh}->selectrow_array('SELECT num FROM over LIMIT 1'); + scalar(@n) ? 1 : undef; } no warnings 'once'; diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm index b9405c0c..d782d4c7 100644 --- a/lib/PublicInbox/LeiToMail.pm +++ b/lib/PublicInbox/LeiToMail.pm @@ -198,7 +198,7 @@ sub _mbox_write_cb ($$) { my $dedupe = $lei->{dedupe}; $dedupe->prepare_dedupe; my $lse = $lei->{lse}; # may be undef - my $set_recent = $dedupe->dedupe_nr; + my $set_recent = $dedupe->has_entries; sub { # for git_to_mail my ($buf, $smsg, $eml) = @_; $eml //= PublicInbox::Eml->new($buf); @@ -293,7 +293,7 @@ sub _maildir_write_cb ($$) { # Favor cur/ and only write to new/ when augmenting. This # saves MUAs from having to do a mass rename when the initial # search result set is huge. - my $dir = $dedupe && $dedupe->dedupe_nr ? 'new/' : 'cur/'; + my $dir = $dedupe && $dedupe->has_entries ? 'new/' : 'cur/'; sub { # for git_to_mail my ($bref, $smsg, $eml) = @_; $dst // return $lei->fail; # dst may be undef-ed in last run diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm index cac7fb7d..3414e87d 100644 --- a/lib/PublicInbox/LeiXSearch.pm +++ b/lib/PublicInbox/LeiXSearch.pm @@ -504,7 +504,7 @@ sub do_query { my $F_SETPIPE_SZ = $^O eq 'linux' ? 1031 : undef; if ($l2m->{-wq_nr_workers} > 1 && $l2m->{base_type} =~ /\A(?:maildir|mbox)\z/) { - # setup two barriers to coordinate dedupe_nr + # setup two barriers to coordinate ->has_entries # between l2m workers pipe(my ($a_r, $a_w)) or die "pipe: $!"; fcntl($a_r, $F_SETPIPE_SZ, 4096) if $F_SETPIPE_SZ; diff --git a/lib/PublicInbox/SharedKV.pm b/lib/PublicInbox/SharedKV.pm index 8347b195..3487e820 100644 --- a/lib/PublicInbox/SharedKV.pm +++ b/lib/PublicInbox/SharedKV.pm @@ -154,6 +154,13 @@ SELECT COUNT(k) FROM kv $sth->fetchrow_array; } +# faster than ->count due to how SQLite works +sub has_entries { + my ($self) = @_; + my @n = $self->{dbh}->selectrow_array('SELECT k FROM kv LIMIT 1'); + scalar(@n) ? 1 : undef; +} + sub dbh_release { my ($self, $lock) = @_; my $dbh = delete $self->{dbh} or return;