From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 756E91FA10 for ; Wed, 26 Aug 2020 08:17:43 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 3/5] over: recent: remove expensive COUNT query Date: Wed, 26 Aug 2020 08:17:40 +0000 Message-Id: <20200826081742.16642-4-e@yhbt.net> In-Reply-To: <20200826081742.16642-1-e@yhbt.net> References: <20200826081742.16642-1-e@yhbt.net> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: As noted in commit 87dca6d8d5988c5eb54019cca342450b0b7dd6b7 ("www: rework query responses to avoid COUNT in SQLite"), COUNT on many rows is expensive on big SQLite DBs. We've already stopped using that code path long ago in WWW while -imapd and -nntpd never used it. So we'll adjust our remaining test cases to not need it, either. --- lib/PublicInbox/Over.pm | 8 +------- t/indexlevels-mirror.t | 29 +++++++++++++++-------------- t/v2writable.t | 4 +++- t/watch_maildir_v2.t | 30 +++++++++++++++--------------- 4 files changed, 34 insertions(+), 37 deletions(-) diff --git a/lib/PublicInbox/Over.pm b/lib/PublicInbox/Over.pm index a2cf9f21..6b7d5216 100644 --- a/lib/PublicInbox/Over.pm +++ b/lib/PublicInbox/Over.pm @@ -244,15 +244,9 @@ sub recent { $s = '+num > 0 ORDER BY ts DESC'; } } - my $msgs = do_get($self, <<"", $opts, @v); + do_get($self, <<"", $opts, @v); SELECT ts,ds,ddd FROM over WHERE $s - return $msgs unless wantarray; - - my $nr = $self->{dbh}->selectrow_array(<<''); -SELECT COUNT(num) FROM over WHERE num > 0 - - ($nr, $msgs); } sub get_art { diff --git a/t/indexlevels-mirror.t b/t/indexlevels-mirror.t index 859c2c17..27533546 100644 --- a/t/indexlevels-mirror.t +++ b/t/indexlevels-mirror.t @@ -49,8 +49,8 @@ my $import_index_incremental = sub { inboxdir => $ibx->{inboxdir}, indexlevel => $level }); - my ($nr, $msgs) = $ro_master->recent; - is($nr, 1, 'only one message in master, so far'); + my $msgs = $ro_master->recent; + is(scalar(@$msgs), 1, 'only one message in master, so far'); is($msgs->[0]->{mid}, 'm@1', 'first message in master indexed'); # clone @@ -79,8 +79,8 @@ my $import_index_incremental = sub { inboxdir => $mirror, indexlevel => $level, }); - ($nr, $msgs) = $ro_mirror->recent; - is($nr, 1, 'only one message, so far'); + $msgs = $ro_mirror->recent; + is(scalar(@$msgs), 1, 'only one message, so far'); is($msgs->[0]->{mid}, 'm@1', 'read first message'); # update master @@ -91,16 +91,16 @@ my $import_index_incremental = sub { # mirror updates is(xsys('git', "--git-dir=$fetch_dir", qw(fetch -q)), 0, 'fetch OK'); ok(run_script([qw(-index -j0), $mirror]), "v$v index mirror again OK"); - ($nr, $msgs) = $ro_mirror->recent; - is($nr, 2, '2nd message seen in mirror'); + $msgs = $ro_mirror->recent; + is(scalar(@$msgs), 2, '2nd message seen in mirror'); is_deeply([sort { $a cmp $b } map { $_->{mid} } @$msgs], ['m@1','m@2'], 'got both messages in mirror'); # incremental index master (required for v1) ok(run_script([qw(-index -j0), $ibx->{inboxdir}, "-L$level"]), 'index master OK'); - ($nr, $msgs) = $ro_master->recent; - is($nr, 2, '2nd message seen in master'); + $msgs = $ro_master->recent; + is(scalar(@$msgs), 2, '2nd message seen in master'); is_deeply([sort { $a cmp $b } map { $_->{mid} } @$msgs], ['m@1','m@2'], 'got both messages in master'); @@ -121,15 +121,15 @@ my $import_index_incremental = sub { is(PublicInbox::Admin::detect_indexlevel($ro_mirror), $level, 'indexlevel detectable by Admin after xcpdb v' .$v.$level); delete $ro_mirror->{$_} for (qw(over search)); - ($nr, $msgs) = $ro_mirror->search->query('m:m@2'); - is($nr, 1, "v$v found m\@2 via Xapian on $level"); + $msgs = $ro_mirror->search->query('m:m@2'); + is(scalar(@$msgs), 1, "v$v found m\@2 via Xapian on $level"); } # sync the mirror is(xsys('git', "--git-dir=$fetch_dir", qw(fetch -q)), 0, 'fetch OK'); ok(run_script([qw(-index -j0), $mirror]), "v$v index mirror again OK"); - ($nr, $msgs) = $ro_mirror->recent; - is($nr, 1, '2nd message gone from mirror'); + $msgs = $ro_mirror->recent; + is(scalar(@$msgs), 1, '2nd message gone from mirror'); is_deeply([map { $_->{mid} } @$msgs], ['m@1'], 'message unavailable in mirror'); @@ -138,8 +138,9 @@ my $import_index_incremental = sub { 'no Xapian shard directories for v2 basic'); } if ($level ne 'basic') { - ($nr, $msgs) = $ro_mirror->search->reopen->query('m:m@2'); - is($nr, 0, "v$v m\@2 gone from Xapian in mirror on $level"); + $msgs = $ro_mirror->search->reopen->query('m:m@2'); + is(scalar(@$msgs), 0, + "v$v m\@2 gone from Xapian in mirror on $level"); } # add another message to master and have the mirror diff --git a/t/v2writable.t b/t/v2writable.t index 2bd7a400..9e4547ba 100644 --- a/t/v2writable.t +++ b/t/v2writable.t @@ -120,7 +120,9 @@ if ('ensure git configs are correct') { $mime->header_set('References', ''); ok($im->add($mime), 'message with multiple Message-ID'); $im->done; - my ($total, undef) = $ibx->over->recent; + my $total = $ibx->over->dbh->selectrow_array(<<''); +SELECT COUNT(*) FROM over WHERE num > 0 + is($ibx->mm->num_highwater, $total, 'got expected highwater value'); my $srch = $ibx->search; my $mset1 = $srch->reopen->query('m:abcde@1', { mset => 1 }); diff --git a/t/watch_maildir_v2.t b/t/watch_maildir_v2.t index f5b8e932..59ec247e 100644 --- a/t/watch_maildir_v2.t +++ b/t/watch_maildir_v2.t @@ -50,7 +50,7 @@ ok($ibx, 'found inbox by name'); my $srch = $ibx->search; PublicInbox::WatchMaildir->new($config)->scan('full'); -my ($total, undef) = $srch->reopen->query(''); +my $total = scalar @{$srch->reopen->query('')}; is($total, 1, 'got one revision'); # my $git = PublicInbox::Git->new("$inboxdir/git/0.git"); @@ -70,7 +70,7 @@ my $write_spam = sub { $write_spam->(); is(unlink(glob("$maildir/new/*")), 1, 'unlinked old spam'); PublicInbox::WatchMaildir->new($config)->scan('full'); -is(($srch->reopen->query(''))[0], 0, 'deleted file'); +is_deeply($srch->reopen->query(''), [], 'deleted file'); is(unlink(glob("$spamdir/cur/*")), 1, 'unlinked trained spam'); # check with scrubbing @@ -81,16 +81,16 @@ the body of a message to majordomo\@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html\n); PublicInbox::Emergency->new($maildir)->prepare(\$msg); PublicInbox::WatchMaildir->new($config)->scan('full'); - my ($nr, $msgs) = $srch->reopen->query(''); - is($nr, 1, 'got one file back'); + my $msgs = $srch->reopen->query(''); + is(scalar(@$msgs), 1, 'got one file back'); my $mref = $ibx->msg_by_smsg($msgs->[0]); like($$mref, qr/something\n\z/s, 'message scrubbed on import'); is(unlink(glob("$maildir/new/*")), 1, 'unlinked spam'); $write_spam->(); PublicInbox::WatchMaildir->new($config)->scan('full'); - ($nr, $msgs) = $srch->reopen->query(''); - is($nr, 0, 'inbox is empty again'); + $msgs = $srch->reopen->query(''); + is(scalar(@$msgs), 0, 'inbox is empty again'); is(unlink(glob("$spamdir/cur/*")), 1, 'unlinked trained spam'); } @@ -105,8 +105,8 @@ More majordomo info at http://vger.kernel.org/majordomo-info.html\n); local $SIG{__WARN__} = sub {}; # quiet spam check warning PublicInbox::WatchMaildir->new($config)->scan('full'); } - my ($nr, $msgs) = $srch->reopen->query(''); - is($nr, 0, 'inbox is still empty'); + my $msgs = $srch->reopen->query(''); + is(scalar(@$msgs), 0, 'inbox is still empty'); is(unlink(glob("$maildir/new/*")), 1); } @@ -118,8 +118,8 @@ More majordomo info at http://vger.kernel.org/majordomo-info.html\n); PublicInbox::Emergency->new($maildir)->prepare(\$msg); $config->{'publicinboxwatch.spamcheck'} = 'spamc'; PublicInbox::WatchMaildir->new($config)->scan('full'); - my ($nr, $msgs) = $srch->reopen->query(''); - is($nr, 1, 'inbox has one mail after spamc OK-ed a message'); + my $msgs = $srch->reopen->query(''); + is(scalar(@$msgs), 1, 'inbox has one mail after spamc OK-ed a message'); my $mref = $ibx->msg_by_smsg($msgs->[0]); like($$mref, qr/something\n\z/s, 'message scrubbed on import'); delete $config->{'publicinboxwatch.spamcheck'}; @@ -131,11 +131,11 @@ More majordomo info at http://vger.kernel.org/majordomo-info.html\n); $msg = do { local $/; <$fh> }; PublicInbox::Emergency->new($maildir)->prepare(\$msg); PublicInbox::WatchMaildir->new($config)->scan('full'); - my ($nr, $msgs) = $srch->reopen->query('dfpost:6e006fd7'); - is($nr, 1, 'diff postimage found'); + my $msgs = $srch->reopen->query('dfpost:6e006fd7'); + is(scalar(@$msgs), 1, 'diff postimage found'); my $post = $msgs->[0]; - ($nr, $msgs) = $srch->query('dfpre:090d998b6c2c'); - is($nr, 1, 'diff preimage found'); + $msgs = $srch->query('dfpre:090d998b6c2c'); + is(scalar(@$msgs), 1, 'diff preimage found'); is($post->{blob}, $msgs->[0]->{blob}, 'same message'); } @@ -162,7 +162,7 @@ both EOF PublicInbox::Emergency->new($maildir)->prepare(\$both); PublicInbox::WatchMaildir->new($config)->scan('full'); - my ($total, $msgs) = $srch->reopen->query('m:both@b.com'); + my $msgs = $srch->reopen->query('m:both@b.com'); my $v1 = $config->lookup_name('v1'); my $msg = $v1->git->cat_file($msgs->[0]->{blob}); is($both, $$msg, 'got original message back from v1');