* [PATCH] search: fix altid search with XapHelper process
@ 2024-05-05 23:35 Eric Wong
2024-05-06 6:08 ` Eric Wong
0 siblings, 1 reply; 2+ messages in thread
From: Eric Wong @ 2024-05-05 23:35 UTC (permalink / raw)
To: meta
External Xapian helper processes need to support non-standard
QueryParser prefixes. The only way to do this is to specify
these prefixes in every `mset' request since we have no idea
if the XH worker servicing the request has initialized the
extra prefixes, yet.
---
lib/PublicInbox/Search.pm | 20 +++++++++++++++-----
lib/PublicInbox/XapHelper.pm | 14 ++++++++++++++
lib/PublicInbox/xap_helper.h | 35 +++++++++++++++++++++++++++++++++++
t/www_altid.t | 24 +++++++++++++++---------
4 files changed, 79 insertions(+), 14 deletions(-)
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index fbdb48a3..e5c5d6ab 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -92,6 +92,7 @@ our @XH_SPEC = (
'K=i', # timeout kill after i seconds
'O=s', # eidx_key
'T=i', # threadid
+ 'Q=s@', # query prefixes "$user_prefix[:=]$XPREFIX"
);
sub load_xapian () {
@@ -435,8 +436,8 @@ sub xhc_start_maybe (@) {
$xhc;
}
-sub xh_opt ($) {
- my ($opt) = @_;
+sub xh_opt ($$) {
+ my ($self, $opt) = @_;
my $lim = $opt->{limit} || 50;
my @ret;
push @ret, '-o', $opt->{offset} if $opt->{offset};
@@ -458,7 +459,16 @@ sub xh_opt ($) {
push @ret, '-t' if $opt->{threads};
push @ret, '-T', $opt->{threadid} if defined $opt->{threadid};
push @ret, '-O', $opt->{eidx_key} if defined $opt->{eidx_key};
- @ret;
+ my $apfx = $self->{-alt_pfx} //= do {
+ my @tmp;
+ for (grep /\Aserial:/, @{$self->{altid} // []}) {
+ my (undef, $pfx) = split /:/, $_;
+ push @tmp, '-Q', "$pfx=X\U$pfx";
+ }
+ # TODO: arbitrary header indexing goes here
+ \@tmp;
+ };
+ (@ret, @$apfx);
}
# returns a true value if actually handled asynchronously,
@@ -467,7 +477,7 @@ sub async_mset {
my ($self, $qry_str, $opt, $cb, @args) = @_;
if ($XHC) { # unconditionally retrieving pct + rank for now
xdb($self); # populate {nshards}
- my @margs = ($self->xh_args, xh_opt($opt));
+ my @margs = ($self->xh_args, xh_opt($self, $opt));
my $ret = eval {
my $rd = $XHC->mkreq(undef, 'mset', @margs, $qry_str);
PublicInbox::XhcMset->maybe_new($rd, $self, $cb, @args);
@@ -630,7 +640,7 @@ EOM
$ret .= qq{\tqp->add_boolean_prefix("$name", "$_");\n}
}
}
- # TODO: altid support
+ # altid support is handled in xh_opt and srch_init_extra in XH
for my $name (sort keys %prob_prefix) {
for (split(/ /, $prob_prefix{$name})) {
$ret .= qq{\tqp->add_prefix("$name", "$_");\n}
diff --git a/lib/PublicInbox/XapHelper.pm b/lib/PublicInbox/XapHelper.pm
index 2e20660e..099bc4fe 100644
--- a/lib/PublicInbox/XapHelper.pm
+++ b/lib/PublicInbox/XapHelper.pm
@@ -172,6 +172,18 @@ sub cmd_mset { # to be used by WWW + IMAP
}
}
+sub srch_init_extra ($) {
+ my ($req) = @_;
+ my $qp = $req->{srch}->{qp};
+ for (@{$req->{Q}}) {
+ my ($upfx, $m, $xpfx) = split /([:=])/;
+ $xpfx // die "E: bad -Q $_";
+ $m = $m eq '=' ? 'add_boolean_prefix' : 'add_prefix';
+ $qp->$m($upfx, $xpfx);
+ }
+ $req->{srch}->{qp_extra_done} = 1;
+}
+
sub dispatch {
my ($req, $cmd, @argv) = @_;
my $fn = $req->can("cmd_$cmd") or return;
@@ -195,6 +207,8 @@ sub dispatch {
$new->{qp} = $new->qparse_new;
$new;
};
+ $req->{Q} && !$req->{srch}->{qp_extra_done} and
+ srch_init_extra $req;
my $timeo = $req->{K};
alarm($timeo) if $timeo;
$fn->($req, @argv);
diff --git a/lib/PublicInbox/xap_helper.h b/lib/PublicInbox/xap_helper.h
index 3df3ce91..4e809fdd 100644
--- a/lib/PublicInbox/xap_helper.h
+++ b/lib/PublicInbox/xap_helper.h
@@ -114,6 +114,7 @@ enum exc_iter {
struct srch {
int paths_len; // int for comparisons
unsigned qp_flags;
+ bool qp_extra_done;
Xapian::Database *db;
Xapian::QueryParser *qp;
char paths[]; // $shard_path0\0$shard_path1\0...
@@ -126,6 +127,7 @@ typedef bool (*cmd)(struct req *);
struct req { // argv and pfxv point into global rbuf
char *argv[MY_ARG_MAX];
char *pfxv[MY_ARG_MAX]; // -A <prefix>
+ char *qpfxv[MY_ARG_MAX]; // -A <prefix>
size_t *lenv; // -A <prefix>LENGTH
struct srch *srch;
char *Pgit_dir;
@@ -139,6 +141,7 @@ struct req { // argv and pfxv point into global rbuf
long sort_col; // value column, negative means BoolWeight
int argc;
int pfxc;
+ int qpfxc;
FILE *fp[2]; // [0] response pipe or sock, [1] status/errors (optional)
bool has_input; // fp[0] is bidirectional
bool collapse_threads;
@@ -584,6 +587,31 @@ static bool srch_init(struct req *req)
return true;
}
+// setup query parser for altid and arbitrary headers
+static void srch_init_extra(struct req *req)
+{
+ const char *XPFX;
+ for (int i = 0; i < req->qpfxc; i++) {
+ size_t len = strlen(req->qpfxv[i]);
+ char *c = (char *)memchr(req->qpfxv[i], '=', len);
+
+ if (c) { // it's boolean gmane:XGMANE
+ XPFX = c + 1;
+ *c = 0;
+ req->srch->qp->add_boolean_prefix(req->qpfxv[i], XPFX);
+ continue;
+ }
+ // maybe it's a non-boolean prefix "blob:XBLOBID"
+ c = (char *)memchr(req->qpfxv[i], ':', len);
+ if (!c)
+ errx(EXIT_FAILURE, "bad -Q %s", req->qpfxv[i]);
+ XPFX = c + 1;
+ *c = 0;
+ req->srch->qp->add_prefix(req->qpfxv[i], XPFX);
+ }
+ req->srch->qp_extra_done = true;
+}
+
static void free_srch(void *p) // tdestroy
{
struct srch *srch = (struct srch *)p;
@@ -665,12 +693,17 @@ static void dispatch(struct req *req)
if (*end || req->threadid == ULLONG_MAX)
ABORT("-T %s", optarg);
break;
+ case 'Q':
+ req->qpfxv[req->qpfxc++] = optarg;
+ if (MY_ARG_MAX == req->qpfxc) ABORT("too many -Q");
+ break;
default: ABORT("bad switch `-%c'", c);
}
}
ERR_CLOSE(kfp, EXIT_FAILURE); // may ENOMEM, sets kbuf.srch
kbuf.srch->db = NULL;
kbuf.srch->qp = NULL;
+ kbuf.srch->qp_extra_done = false;
kbuf.srch->paths_len = size - offsetof(struct srch, paths);
if (kbuf.srch->paths_len <= 0)
ABORT("no -d args");
@@ -687,6 +720,8 @@ static void dispatch(struct req *req)
free_srch(kbuf.srch);
goto cmd_err; // srch_init already warned
}
+ if (req->qpfxc && !req->srch->qp_extra_done)
+ srch_init_extra(req);
if (req->timeout_sec)
alarm(req->timeout_sec > UINT_MAX ?
UINT_MAX : (unsigned)req->timeout_sec);
diff --git a/t/www_altid.t b/t/www_altid.t
index de1e6ed6..7ad4a1d2 100644
--- a/t/www_altid.t
+++ b/t/www_altid.t
@@ -6,7 +6,7 @@ use PublicInbox::Config;
use PublicInbox::Spawn qw(spawn);
require_cmd('sqlite3');
require_mods(qw(DBD::SQLite HTTP::Request::Common Plack::Test URI::Escape
- Plack::Builder IO::Uncompress::Gunzip));
+ Plack::Builder IO::Uncompress::Gunzip Xapian));
use_ok($_) for qw(Plack::Test HTTP::Request::Common);
require_ok 'PublicInbox::Msgmap';
require_ok 'PublicInbox::AltId';
@@ -14,17 +14,13 @@ require_ok 'PublicInbox::WWW';
my ($tmpdir, $for_destroy) = tmpdir();
my $aid = 'xyz';
my $cfgpath;
-my $ibx = create_inbox 'test', indexlevel => 'basic', sub {
+my $spec = "serial:$aid:file=blah.sqlite3";
+my $ibx = create_inbox 'test-altid', indexlevel => 'medium',
+ altid => [ $spec ], sub {
my ($im, $ibx) = @_;
- $im->add(PublicInbox::Eml->new(<<'EOF')) or BAIL_OUT;
-From: a@example.com
-Message-Id: <a@example.com>
-
-EOF
- # $im->done;
- my $spec = "serial:$aid:file=blah.sqlite3";
my $altid = PublicInbox::AltId->new($ibx, $spec, 1);
$altid->mm_alt->mid_set(1, 'a@example.com');
+ undef $altid;
$cfgpath = "$ibx->{inboxdir}/cfg";
open my $fh, '>', $cfgpath or BAIL_OUT "open $cfgpath: $!";
print $fh <<EOF or BAIL_OUT $!;
@@ -35,6 +31,11 @@ EOF
url = http://example.com/test
EOF
close $fh or BAIL_OUT $!;
+ $im->add(PublicInbox::Eml->new(<<'EOF')) or BAIL_OUT;
+From: a@example.com
+Message-Id: <a@example.com>
+
+EOF
};
$cfgpath //= "$ibx->{inboxdir}/cfg";
my $cfg = PublicInbox::Config->new($cfgpath);
@@ -56,6 +57,11 @@ my $client = sub {
is($mm_cmp->mid_for(1), 'a@example.com', 'sqlite3 dump valid');
$mm_cmp = undef;
unlink $cmpfile or die;
+
+ $res = $cb->(GET('/test/?q=xyz:1'));
+ is $res->code, 200, 'altid search hit';
+ $res = $cb->(GET('/test/?q=xyz:10'));
+ is $res->code, 404, 'altid search miss';
};
test_psgi(sub { $www->call(@_) }, $client);
SKIP: {
^ permalink raw reply related [flat|nested] 2+ messages in thread
* Re: [PATCH] search: fix altid search with XapHelper process
2024-05-05 23:35 [PATCH] search: fix altid search with XapHelper process Eric Wong
@ 2024-05-06 6:08 ` Eric Wong
0 siblings, 0 replies; 2+ messages in thread
From: Eric Wong @ 2024-05-06 6:08 UTC (permalink / raw)
To: meta
Eric Wong <e@80x24.org> wrote:
> +++ b/lib/PublicInbox/xap_helper.h
> @@ -126,6 +127,7 @@ typedef bool (*cmd)(struct req *);
> struct req { // argv and pfxv point into global rbuf
> char *argv[MY_ARG_MAX];
> char *pfxv[MY_ARG_MAX]; // -A <prefix>
> + char *qpfxv[MY_ARG_MAX]; // -A <prefix>
That comment is wrong.
> +// setup query parser for altid and arbitrary headers
> +static void srch_init_extra(struct req *req)
> +{
> + const char *XPFX;
> + for (int i = 0; i < req->qpfxc; i++) {
> + size_t len = strlen(req->qpfxv[i]);
> + char *c = (char *)memchr(req->qpfxv[i], '=', len);
> +
> + if (c) { // it's boolean gmane:XGMANE
And so is that comment. Will squash this in before pushing:
diff --git a/lib/PublicInbox/xap_helper.h b/lib/PublicInbox/xap_helper.h
index 4e809fdd..bdc1c5b1 100644
--- a/lib/PublicInbox/xap_helper.h
+++ b/lib/PublicInbox/xap_helper.h
@@ -127,7 +127,7 @@ typedef bool (*cmd)(struct req *);
struct req { // argv and pfxv point into global rbuf
char *argv[MY_ARG_MAX];
char *pfxv[MY_ARG_MAX]; // -A <prefix>
- char *qpfxv[MY_ARG_MAX]; // -A <prefix>
+ char *qpfxv[MY_ARG_MAX]; // -Q <user_prefix>[:=]<INTERNAL_PREFIX>
size_t *lenv; // -A <prefix>LENGTH
struct srch *srch;
char *Pgit_dir;
@@ -595,7 +595,7 @@ static void srch_init_extra(struct req *req)
size_t len = strlen(req->qpfxv[i]);
char *c = (char *)memchr(req->qpfxv[i], '=', len);
- if (c) { // it's boolean gmane:XGMANE
+ if (c) { // it's boolean "gmane=XGMANE"
XPFX = c + 1;
*c = 0;
req->srch->qp->add_boolean_prefix(req->qpfxv[i], XPFX);
^ permalink raw reply related [flat|nested] 2+ messages in thread
end of thread, other threads:[~2024-05-06 6:16 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-05-05 23:35 [PATCH] search: fix altid search with XapHelper process Eric Wong
2024-05-06 6:08 ` Eric Wong
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).