From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id DB6CB1FC0D for ; Sat, 28 Nov 2020 05:09:16 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 2/5] nntp: speed up mid_lookup() using ->ALL extindex Date: Sat, 28 Nov 2020 05:09:13 +0000 Message-Id: <20201128050916.5586-3-e@80x24.org> In-Reply-To: <20201128050916.5586-1-e@80x24.org> References: <20201128050916.5586-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: We can reuse "xref3" information in extindex to quickly match messages matching a given Message-ID across hundreds or thousands of newsgroups with a few SQL statements. "XHDR Xref $MESSAGE_ID" is around 40% faster, on top of previous speedups. --- lib/PublicInbox/NNTP.pm | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm index cc6534b9..7b3b1ffe 100644 --- a/lib/PublicInbox/NNTP.pm +++ b/lib/PublicInbox/NNTP.pm @@ -730,10 +730,36 @@ sub mid_lookup ($$) { my $n = $self_ng->mm->num_for($mid); return ($self_ng, $n) if defined $n; } - foreach my $ng (values %{$self->{nntpd}->{groups}}) { - next if defined $self_ng && $ng eq $self_ng; - my $n = $ng->mm->num_for($mid); - return ($ng, $n) if defined $n; + my $pi_cfg = $self->{nntpd}->{pi_config}; + if (my $ALL = $pi_cfg->ALL) { + my ($id, $prev); + while (my $smsg = $ALL->over->next_by_mid($mid, \$id, \$prev)) { + my $xr3 = $ALL->over->get_xref3($smsg->{num}); + if (my @x = grep(/:$smsg->{blob}\z/, @$xr3)) { + my ($ngname, $xnum) = split(/:/, $x[0]); + my $ibx = $pi_cfg->{-by_newsgroup}->{$ngname}; + return ($ibx, $xnum) if $ibx; + # fall through to trying all xref3s + } else { + warn < ($smsg->{blob}) in $ALL->{topdir}, -extindex bug? +EOF + } + # try all xref3s + for my $x (@$xr3) { + my ($ngname, $xnum) = split(/:/, $x); + my $ibx = $pi_cfg->{-by_newsgroup}->{$ngname}; + return ($ibx, $xnum) if $ibx; + warn "W: `$ngname' does not exist for #$xnum\n"; + } + } + # no warning here, $mid is just invalid + } else { # slow path for non-ALL users + foreach my $ibx (values %{$self->{nntpd}->{groups}}) { + next if defined $self_ng && $ibx eq $self_ng; + my $n = $ibx->mm->num_for($mid); + return ($ibx, $n) if defined $n; + } } (undef, undef); }