From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 8E3241FB0D for ; Thu, 20 Aug 2020 20:25:01 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 19/23] mbox: avoid Xapian docdata in search results Date: Thu, 20 Aug 2020 20:24:53 +0000 Message-Id: <20200820202457.21042-20-e@yhbt.net> In-Reply-To: <20200820202457.21042-1-e@yhbt.net> References: <20200820202457.21042-1-e@yhbt.net> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: Another place where we can reduce kernel page cache overhead by hitting over.sqlite3 instead of docdata.glass. --- lib/PublicInbox/Mbox.pm | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm index a83c0356..0fa9a38d 100644 --- a/lib/PublicInbox/Mbox.pm +++ b/lib/PublicInbox/Mbox.pm @@ -10,6 +10,7 @@ use PublicInbox::MID qw/mid_escape/; use PublicInbox::Hval qw/to_filename/; use PublicInbox::Smsg; use PublicInbox::Eml; +use PublicInbox::Search qw(mdocid); # called by PSGI server as body response # this gets called twice for every message, once to return the header, @@ -205,20 +206,19 @@ sub mbox_all_ids { sub results_cb { my ($ctx) = @_; - my $srch = $ctx->{-inbox}->search(undef, $ctx) or return; - my $mset = $ctx->{mset}; + my $over = $ctx->{-inbox}->over or return; while (1) { - while (my $mi = (($mset->items)[$ctx->{iter}++])) { - my $smsg = PublicInbox::Smsg::from_mitem($mi, - $srch) or next; + while (defined(my $num = shift(@{$ctx->{ids}}))) { + my $smsg = $over->get_art($num) or next; return $smsg; } # refill result set - $mset = $ctx->{mset} = $srch->query($ctx->{query}, - $ctx->{qopts}); + my $srch = $ctx->{-inbox}->search(undef, $ctx) or return; + my $mset = $srch->query($ctx->{query}, $ctx->{qopts}); my $size = $mset->size or return; $ctx->{qopts}->{offset} += $size; - $ctx->{iter} = 0; + my $nshard = $srch->{nshard} // 1; + $ctx->{ids} = [ map { mdocid($nshard, $_) } $mset->items ]; } } @@ -226,15 +226,16 @@ sub mbox_all { my ($ctx, $query) = @_; return mbox_all_ids($ctx) if $query eq ''; - my $qopts = $ctx->{qopts} = { mset => 2 }; + my $qopts = $ctx->{qopts} = { mset => 2 }; # order by docid my $srch = $ctx->{-inbox}->search or return PublicInbox::WWW::need($ctx, 'Search'); - my $mset = $ctx->{mset} = $srch->query($query, $qopts); + my $mset = $srch->query($query, $qopts); $qopts->{offset} = $mset->size or return [404, [qw(Content-Type text/plain)], ["No results found\n"]]; - $ctx->{iter} = 0; $ctx->{query} = $query; + my $nshard = $srch->{nshard} // 1; + $ctx->{ids} = [ map { mdocid($nshard, $_) } $mset->items ]; require PublicInbox::MboxGz; PublicInbox::MboxGz::mbox_gz($ctx, \&results_cb, 'results-'.$query); }