From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.2 required=3.0 tests=ALL_TRUSTED,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF, T_SCC_BODY_TEXT_LINE shortcircuit=no autolearn=ham autolearn_force=no version=3.4.6 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 7050A1F542; Fri, 16 Jun 2023 23:13:01 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1686957181; bh=Y3WxDwce+dwREHnmtsj42DbSCRbIiMFzrgnJNNJZyGM=; h=Date:From:To:Cc:Subject:References:In-Reply-To:From; b=Q2+Y346iHT3+CyByynsDJknkTj60yTqLaxpBaLkMFX6p7j3XgygGkD8E14odMTNyW 0t3K/Y2AsMPnlRxQUD1FbvBkUWlo9F85emFKKl/K6SX0QwwooC3UImrzIeyJbWTohs pnoHEppu4ZSJP0C17D5l74a4bQbYyQZ8w63FK+U4= Date: Fri, 16 Jun 2023 23:13:01 +0000 From: Eric Wong To: Konstantin Ryabitsev Cc: meta@public-inbox.org Subject: [PATCH] www: use correct threadid for per-thread search Message-ID: <20230616231301.M394415@dcvr> References: <20230327213849.M743623@dcvr> <20230328194549.M808175@dcvr> <20230328-monsoon-charred-giver-91f26d3024fb@meerkat> <20230328220830.M352242@dcvr> <20230328-oppressed-almighty-61330f9dde22@meerkat> <20230329212558.M622984@dcvr> <20230330112951.M493025@dcvr> <20230616-rudy-comedy-vision-2b9f92@meerkat> MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Disposition: inline In-Reply-To: <20230616-rudy-comedy-vision-2b9f92@meerkat> List-Id: Konstantin Ryabitsev wrote: > On Thu, Mar 30, 2023 at 11:29:51AM +0000, Eric Wong wrote: > > This implements the mbox.gz retrieval. I didn't want to deal > > with HTML nor figuring out how to expose more
elements, > > yet; but I figure mbox.gz is the most important. > > > > Now deployed on 80x24.org/lore: > > > > MSGID=20230327080502.GA570847@ziqianlu-desk2 > > curl -d '' -sSf \ > > https://80x24.org/lore/all/"$MSGID/?x=m&q=rt:2023-03-29.." | \ > > zcat | grep -i ^Message-ID: > > Eric: > > Reviving this old thread for some clarification. I noticed that this only > works for /all/, but not for individual inboxes. E.g.: > > $ curl -d '' -sSf \ > https://lore.kernel.org/all/"$MSGID/?x=m&q=rt:2023-03-29.." \ > | zgrep -i ^Message-ID: > Message-ID: > > but with /lkml/ I get a 404: > > $ curl -d '' -sSf \ > https://lore.kernel.org/lkml/"$MSGID/?x=m&q=rt:2023-03-29.." \ > | zgrep -i ^Message-ID: > curl: (22) The requested URL returned error: 404 > > Is that intentionally restricted to just extindex? It's a bug, fix below and deployed to https://80x24.org/lore/ ---------8<--------- Subject: [PATCH] www: use correct threadid for per-thread search For individual public-inboxes relying on extindex for per-inbox search, we must use the threadid from the extindex over.sqlite3 rather than the per-inbox over.sqlite3 file. Reported-by: Konstantin Ryabitsev Link: https://public-inbox.org/meta/20230616-rudy-comedy-vision-2b9f92@meerkat/ --- lib/PublicInbox/Mbox.pm | 10 +++++++--- t/extindex-psgi.t | 39 +++++++++++++++++++++++++++++++++++++-- 2 files changed, 44 insertions(+), 5 deletions(-) diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm index e1abf7ec..bf61bb0e 100644 --- a/lib/PublicInbox/Mbox.pm +++ b/lib/PublicInbox/Mbox.pm @@ -225,15 +225,19 @@ sub mbox_all { return mbox_all_ids($ctx) if $q_string !~ /\S/; my $srch = $ctx->{ibx}->isrch or return PublicInbox::WWW::need($ctx, 'Search'); - my $over = $ctx->{ibx}->over or - return PublicInbox::WWW::need($ctx, 'Overview'); my $qopts = $ctx->{qopts} = { relevance => -2 }; # ORDER BY docid DESC # {threadid} limits results to a given thread # {threads} collapses results from messages in the same thread, # allowing us to use ->expand_thread w/o duplicates in our own code - $qopts->{threadid} = $over->mid2tid($ctx->{mid}) if defined($ctx->{mid}); + if (defined($ctx->{mid})) { + my $over = ($ctx->{ibx}->{isrch} ? + $ctx->{ibx}->{isrch}->{es}->over : + $ctx->{ibx}->over) or + return PublicInbox::WWW::need($ctx, 'Overview'); + $qopts->{threadid} = $over->mid2tid($ctx->{mid}); + } $qopts->{threads} = 1 if $q->{t}; $srch->query_approxidate($ctx->{ibx}->git, $q_string); my $mset = $srch->mset($q_string, $qopts); diff --git a/t/extindex-psgi.t b/t/extindex-psgi.t index 98dc2e48..f10ffbb6 100644 --- a/t/extindex-psgi.t +++ b/t/extindex-psgi.t @@ -1,5 +1,5 @@ #!perl -w -# Copyright (C) 2020-2021 all contributors +# Copyright (C) all contributors # License: AGPL-3.0+ use strict; use v5.10.1; @@ -21,7 +21,28 @@ mkdir "$home/.public-inbox" or BAIL_OUT $!; my $pi_config = "$home/.public-inbox/config"; cp($cfg_path, $pi_config) or BAIL_OUT; my $env = { HOME => $home }; -run_script([qw(-extindex --all), "$tmpdir/eidx"], $env) or BAIL_OUT; +my $m2t = create_inbox 'mid2tid', version => 2, indexlevel => 'basic', sub { + my ($im, $ibx) = @_; + for my $n (1..3) { + $im->add(PublicInbox::Eml->new(< +Subject: tid $n +From: x\@example.com +References: + +$n +EOM + $im->add(PublicInbox::Eml->new(< +Subject: unrelated tid $n +From: x\@example.com +References: + +EOM + } +}; { open my $cfgfh, '>>', $pi_config or BAIL_OUT; $cfgfh->autoflush(1); @@ -32,8 +53,14 @@ run_script([qw(-extindex --all), "$tmpdir/eidx"], $env) or BAIL_OUT; [publicinbox] wwwlisting = all grokManifest = all +[publicinbox "m2t"] + inboxdir = $m2t->{inboxdir} + address = $m2t->{-primary_address} EOM + close $cfgfh or xbail "close: $!"; } + +run_script([qw(-extindex --all), "$tmpdir/eidx"], $env) or BAIL_OUT; my $www = PublicInbox::WWW->new(PublicInbox::Config->new($pi_config)); my $client = sub { my ($cb) = @_; @@ -83,6 +110,14 @@ my $client = sub { 't2 manifest'); is_deeply([ sort keys %{$m->{'/t1'}} ], [ '/t1' ], 't2 manifest'); + + # ensure ibx->{isrch}->{es}->over is used instead of ibx->over: + $res = $cb->(POST("/m2t/t\@1/?q=dt:19931002000259..&x=m")); + is($res->code, 200, 'hit on mid2tid query'); + $res = $cb->(POST("/m2t/t\@1/?q=dt:19931002000400..&x=m")); + is($res->code, 404, '404 on out-of-range mid2tid query'); + $res = $cb->(POST("/m2t/t\@1/?q=s:unrelated&x=m")); + is($res->code, 404, '404 on cross-thread search'); }; test_psgi(sub { $www->call(@_) }, $client); %$env = (%$env, TMPDIR => $tmpdir, PI_CONFIG => $pi_config);