From: Eric Wong <e@80x24.org>
To: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
Cc: meta@public-inbox.org
Subject: Re: Cheap way to check for new messages in a thread
Date: Thu, 30 Mar 2023 11:29:51 +0000 [thread overview]
Message-ID: <20230330112951.M493025@dcvr> (raw)
In-Reply-To: <20230329212558.M622984@dcvr>
Eric Wong <e@80x24.org> wrote:
> Konstantin Ryabitsev <konstantin@linuxfoundation.org> wrote:
> > However, if you do want to add ability to cheaply do a "give me just the
> > newest messages in this thread since this datetime", that would be great for
> > my needs. :)
>
> Per-thread search is something I've wanted for a while, anyways,
> so I think I'll do /$MSGID/?q= in between ongoing work for
This implements the mbox.gz retrieval. I didn't want to deal
with HTML nor figuring out how to expose more <form> elements,
yet; but I figure mbox.gz is the most important.
Now deployed on 80x24.org/lore:
MSGID=20230327080502.GA570847@ziqianlu-desk2
curl -d '' -sSf \
https://80x24.org/lore/all/"$MSGID/?x=m&q=rt:2023-03-29.." | \
zcat | grep -i ^Message-ID:
shows the expected messages.
-----------8<-----------
Subject: [PATCH] www: support POST /$INBOX/$MSGID/?x=m&q=
This allows filtering the contents of any existing thread using
a search query. It uses the existing THREADID column in Xapian
so we can internally add a Xapian OP_FILTER to the results.
This new functionality is orthogonal to the existing `t=1'
parameter which gives mairix-style thread expansion. It doesn't
make sense to use `t=1' with this functionality, but it's not
disallowed, either.
The indentation change in Over->next_by_mid is to ensure
DBI->prepare_cached can share across both ->next_by_mid
and ->mid2tid.
I also noticed the existing regex for `POST /$INBOX/?x=m&q=' was
allowing extra characters. With an added \z, it's now as strict
was originally intended and AFAIK nothing was generating invalid
URLs for it
Reported-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
Link: https://public-inbox.org/meta/aaniyhk7wfm4e6m5mbukcrhevzoc6ftctyrfwvmz4fkykwwtlj@mverfng6ytas/T/
---
lib/PublicInbox/Mbox.pm | 5 ++++
lib/PublicInbox/Over.pm | 24 ++++++++++++++++++-
lib/PublicInbox/Search.pm | 6 +++++
lib/PublicInbox/WWW.pm | 4 +++-
t/psgi_v2.t | 50 ++++++++++++++++++++++++++++++++++-----
5 files changed, 81 insertions(+), 8 deletions(-)
diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm
index 18db9d38..e1abf7ec 100644
--- a/lib/PublicInbox/Mbox.pm
+++ b/lib/PublicInbox/Mbox.pm
@@ -229,6 +229,11 @@ sub mbox_all {
return PublicInbox::WWW::need($ctx, 'Overview');
my $qopts = $ctx->{qopts} = { relevance => -2 }; # ORDER BY docid DESC
+
+ # {threadid} limits results to a given thread
+ # {threads} collapses results from messages in the same thread,
+ # allowing us to use ->expand_thread w/o duplicates in our own code
+ $qopts->{threadid} = $over->mid2tid($ctx->{mid}) if defined($ctx->{mid});
$qopts->{threads} = 1 if $q->{t};
$srch->query_approxidate($ctx->{ibx}->git, $q_string);
my $mset = $srch->mset($q_string, $qopts);
diff --git a/lib/PublicInbox/Over.pm b/lib/PublicInbox/Over.pm
index 271e2246..6ba27118 100644
--- a/lib/PublicInbox/Over.pm
+++ b/lib/PublicInbox/Over.pm
@@ -283,13 +283,35 @@ SELECT eidx_key FROM inboxes WHERE ibx_id = ?
$rows;
}
+sub mid2tid {
+ my ($self, $mid) = @_;
+ my $dbh = dbh($self);
+
+ my $sth = $dbh->prepare_cached(<<'', undef, 1);
+SELECT id FROM msgid WHERE mid = ? LIMIT 1
+
+ $sth->execute($mid);
+ my $id = $sth->fetchrow_array or return;
+ $sth = $dbh->prepare_cached(<<'', undef, 1);
+SELECT num FROM id2num WHERE id = ? AND num > ?
+ORDER BY num ASC LIMIT 1
+
+ $sth->execute($id, 0);
+ my $num = $sth->fetchrow_array or return;
+ $sth = $dbh->prepare(<<'');
+SELECT tid FROM over WHERE num = ? LIMIT 1
+
+ $sth->execute($num);
+ $sth->fetchrow_array;
+}
+
sub next_by_mid {
my ($self, $mid, $id, $prev) = @_;
my $dbh = dbh($self);
unless (defined $$id) {
my $sth = $dbh->prepare_cached(<<'', undef, 1);
- SELECT id FROM msgid WHERE mid = ? LIMIT 1
+SELECT id FROM msgid WHERE mid = ? LIMIT 1
$sth->execute($mid);
$$id = $sth->fetchrow_array;
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 5133a3b7..6c3d9f93 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -386,6 +386,12 @@ sub mset {
sortable_serialise($uid_range->[1]));
$query = $X{Query}->new(OP_FILTER(), $query, $range);
}
+ if (defined(my $tid = $opt->{threadid})) {
+ $tid = sortable_serialise($tid);
+ $query = $X{Query}->new(OP_FILTER(), $query,
+ $X{Query}->new(OP_VALUE_RANGE(), THREADID, $tid, $tid));
+ }
+
my $xdb = xdb($self);
my $enq = $X{Enquire}->new($xdb);
$enq->set_query($query);
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 9ffcb879..a8f1ad17 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -68,7 +68,9 @@ sub call {
my ($idx, $fn) = ($3, $4);
return invalid_inbox_mid($ctx, $1, $2) ||
get_attach($ctx, $idx, $fn);
- } elsif ($path_info =~ m!$INBOX_RE/!o) {
+ } elsif ($path_info =~ m!$INBOX_RE/$MID_RE/\z!o) {
+ return invalid_inbox_mid($ctx, $1, $2) || mbox_results($ctx);
+ } elsif ($path_info =~ m!$INBOX_RE/\z!o) {
return invalid_inbox($ctx, $1) || mbox_results($ctx);
}
}
diff --git a/t/psgi_v2.t b/t/psgi_v2.t
index 5b197a9f..0a77adfb 100644
--- a/t/psgi_v2.t
+++ b/t/psgi_v2.t
@@ -4,6 +4,7 @@
use strict;
use v5.10.1;
use PublicInbox::TestCommon;
+use IO::Uncompress::Gunzip qw(gunzip);
require_git(2.6);
use PublicInbox::Eml;
use PublicInbox::Config;
@@ -76,6 +77,30 @@ $new_mid //= do {
local $/;
<$fh>;
};
+
+my $m2t = create_inbox 'mid2tid-1', version => 2, indexlevel => 'medium', sub {
+ my ($im, $ibx) = @_;
+ for my $n (1..3) {
+ $im->add(PublicInbox::Eml->new(<<EOM)) or xbail 'add';
+Date: Fri, 02 Oct 1993 00:0$n:00 +0000
+Message-ID: <t\@$n>
+Subject: tid $n
+From: x\@example.com
+References: <a-mid\@b>
+
+$n
+EOM
+ $im->add(PublicInbox::Eml->new(<<EOM)) or xbail 'add';
+Date: Fri, 02 Oct 1993 00:0$n:00 +0000
+Message-ID: <ut\@$n>
+Subject: unrelated tid $n
+From: x\@example.com
+References: <b-mid\@b>
+
+EOM
+ }
+};
+
my $cfgpath = "$ibx->{inboxdir}/pi_config";
{
open my $fh, '>', $cfgpath or BAIL_OUT $!;
@@ -86,6 +111,9 @@ my $cfgpath = "$ibx->{inboxdir}/pi_config";
[publicinbox "dup"]
inboxdir = $dibx->{inboxdir}
address = $dibx->{-primary_address}
+[publicinbox "m2t"]
+ inboxdir = $m2t->{inboxdir}
+ address = $m2t->{-primary_address}
EOF
close $fh or BAIL_OUT;
}
@@ -178,20 +206,18 @@ my $client1 = sub {
$cfg->each_inbox(sub { $_[0]->search->reopen });
SKIP: {
- eval { require IO::Uncompress::Gunzip };
- skip 'IO::Uncompress::Gunzip missing', 6 if $@;
my ($in, $out, $status);
my $req = GET('/v2test/a-mid@b/raw');
$req->header('Accept-Encoding' => 'gzip');
$res = $cb->($req);
is($res->header('Content-Encoding'), 'gzip', 'gzip encoding');
$in = $res->content;
- IO::Uncompress::Gunzip::gunzip(\$in => \$out);
+ gunzip(\$in => \$out);
is($out, $raw, 'gzip response matches');
$res = $cb->(GET('/v2test/a-mid@b/t.mbox.gz'));
$in = $res->content;
- $status = IO::Uncompress::Gunzip::gunzip(\$in => \$out);
+ $status = gunzip(\$in => \$out);
unlike($out, qr/^From oldbug/sm, 'buggy "From_" line omitted');
like($out, qr/^hello world$/m, 'got first in t.mbox.gz');
like($out, qr/^hello world!$/m, 'got second in t.mbox.gz');
@@ -202,7 +228,7 @@ my $client1 = sub {
# search interface
$res = $cb->(POST('/v2test/?q=m:a-mid@b&x=m'));
$in = $res->content;
- $status = IO::Uncompress::Gunzip::gunzip(\$in => \$out);
+ $status = gunzip(\$in => \$out);
unlike($out, qr/^From oldbug/sm, 'buggy "From_" line omitted');
like($out, qr/^hello world$/m, 'got first in mbox POST');
like($out, qr/^hello world!$/m, 'got second in mbox POST');
@@ -213,7 +239,7 @@ my $client1 = sub {
# all.mbox.gz interface
$res = $cb->(GET('/v2test/all.mbox.gz'));
$in = $res->content;
- $status = IO::Uncompress::Gunzip::gunzip(\$in => \$out);
+ $status = gunzip(\$in => \$out);
unlike($out, qr/^From oldbug/sm, 'buggy "From_" line omitted');
like($out, qr/^hello world$/m, 'got first in all.mbox');
like($out, qr/^hello world!$/m, 'got second in all.mbox');
@@ -335,6 +361,18 @@ my $client3 = sub {
local $SIG{__WARN__} = sub { push @warn, @_ };
$res = $cb->(GET('/v2test/?t=1970'.'01'.'01'));
is_deeply(\@warn, [], 'no warnings on YYYYMMDD only');
+
+ $res = $cb->(POST("/m2t/t\@1/?q=dt:19931002000300..&x=m"));
+ is($res->code, 200, 'got 200 on mid2tid query');
+ gunzip(\(my $in = $res->content) => \(my $out));
+ my @m = ($out =~ m!^Message-ID: <([^>]+)>\n!gms);
+ is_deeply(\@m, ['t@3'], 'only got latest result from query');
+
+ $res = $cb->(POST("/m2t/t\@1/?q=dt:19931002000400..&x=m"));
+ is($res->code, 404, '404 on out-of-range mid2tid query');
+
+ $res = $cb->(POST("/m2t/t\@1/?q=s:unrelated&x=m"));
+ is($res->code, 404, '404 on cross-thread search');
};
test_psgi(sub { $www->call(@_) }, $client3);
test_httpd($env, $client3, 4);
next prev parent reply other threads:[~2023-03-30 11:29 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-03-27 15:08 Cheap way to check for new messages in a thread Konstantin Ryabitsev
2023-03-27 19:10 ` Eric Wong
2023-03-27 20:47 ` Konstantin Ryabitsev
2023-03-27 21:38 ` Eric Wong
2023-03-28 14:04 ` Konstantin Ryabitsev
2023-03-28 19:45 ` Eric Wong
2023-03-28 20:00 ` Konstantin Ryabitsev
2023-03-28 22:08 ` Eric Wong
2023-03-28 23:30 ` Konstantin Ryabitsev
2023-03-29 21:25 ` Eric Wong
2023-03-30 11:29 ` Eric Wong [this message]
2023-03-30 16:45 ` Konstantin Ryabitsev
2023-03-31 1:40 ` Eric Wong
2023-04-11 11:27 ` Eric Wong
2023-06-16 19:11 ` Konstantin Ryabitsev
2023-06-16 23:13 ` [PATCH] www: use correct threadid for per-thread search Eric Wong
2023-06-21 17:11 ` Konstantin Ryabitsev
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230330112951.M493025@dcvr \
--to=e@80x24.org \
--cc=konstantin@linuxfoundation.org \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).