From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.2 required=3.0 tests=ALL_TRUSTED,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF shortcircuit=no autolearn=ham autolearn_force=no version=3.4.6 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 1946F1F44D for ; Sun, 5 May 2024 23:35:11 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1714952111; bh=3iaVDjilAxesYSwie5d8VqkJCWQreYYjQ83F+223wRE=; h=From:To:Subject:Date:From; b=CqB/EX8Ch43O7ov+HGAKOtVHUdrUNnrC7knDeyzCuO/9BruAyXLDt7BQrEx31fItS 6QTx1LKVLN4QVs5/i59QyxGIu9oCf88F4PLC8yYVJ2Karnqj0vs5Ug4n7029+2+QOX 26fMwGUWaWKy+UaFkDEUIxI8dSiNtTqGmeM7MTj0= From: Eric Wong To: meta@public-inbox.org Subject: [PATCH] search: fix altid search with XapHelper process Date: Sun, 5 May 2024 23:35:10 +0000 Message-ID: <20240505233510.1896122-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: External Xapian helper processes need to support non-standard QueryParser prefixes. The only way to do this is to specify these prefixes in every `mset' request since we have no idea if the XH worker servicing the request has initialized the extra prefixes, yet. --- lib/PublicInbox/Search.pm | 20 +++++++++++++++----- lib/PublicInbox/XapHelper.pm | 14 ++++++++++++++ lib/PublicInbox/xap_helper.h | 35 +++++++++++++++++++++++++++++++++++ t/www_altid.t | 24 +++++++++++++++--------- 4 files changed, 79 insertions(+), 14 deletions(-) diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index fbdb48a3..e5c5d6ab 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -92,6 +92,7 @@ our @XH_SPEC = ( 'K=i', # timeout kill after i seconds 'O=s', # eidx_key 'T=i', # threadid + 'Q=s@', # query prefixes "$user_prefix[:=]$XPREFIX" ); sub load_xapian () { @@ -435,8 +436,8 @@ sub xhc_start_maybe (@) { $xhc; } -sub xh_opt ($) { - my ($opt) = @_; +sub xh_opt ($$) { + my ($self, $opt) = @_; my $lim = $opt->{limit} || 50; my @ret; push @ret, '-o', $opt->{offset} if $opt->{offset}; @@ -458,7 +459,16 @@ sub xh_opt ($) { push @ret, '-t' if $opt->{threads}; push @ret, '-T', $opt->{threadid} if defined $opt->{threadid}; push @ret, '-O', $opt->{eidx_key} if defined $opt->{eidx_key}; - @ret; + my $apfx = $self->{-alt_pfx} //= do { + my @tmp; + for (grep /\Aserial:/, @{$self->{altid} // []}) { + my (undef, $pfx) = split /:/, $_; + push @tmp, '-Q', "$pfx=X\U$pfx"; + } + # TODO: arbitrary header indexing goes here + \@tmp; + }; + (@ret, @$apfx); } # returns a true value if actually handled asynchronously, @@ -467,7 +477,7 @@ sub async_mset { my ($self, $qry_str, $opt, $cb, @args) = @_; if ($XHC) { # unconditionally retrieving pct + rank for now xdb($self); # populate {nshards} - my @margs = ($self->xh_args, xh_opt($opt)); + my @margs = ($self->xh_args, xh_opt($self, $opt)); my $ret = eval { my $rd = $XHC->mkreq(undef, 'mset', @margs, $qry_str); PublicInbox::XhcMset->maybe_new($rd, $self, $cb, @args); @@ -630,7 +640,7 @@ EOM $ret .= qq{\tqp->add_boolean_prefix("$name", "$_");\n} } } - # TODO: altid support + # altid support is handled in xh_opt and srch_init_extra in XH for my $name (sort keys %prob_prefix) { for (split(/ /, $prob_prefix{$name})) { $ret .= qq{\tqp->add_prefix("$name", "$_");\n} diff --git a/lib/PublicInbox/XapHelper.pm b/lib/PublicInbox/XapHelper.pm index 2e20660e..099bc4fe 100644 --- a/lib/PublicInbox/XapHelper.pm +++ b/lib/PublicInbox/XapHelper.pm @@ -172,6 +172,18 @@ sub cmd_mset { # to be used by WWW + IMAP } } +sub srch_init_extra ($) { + my ($req) = @_; + my $qp = $req->{srch}->{qp}; + for (@{$req->{Q}}) { + my ($upfx, $m, $xpfx) = split /([:=])/; + $xpfx // die "E: bad -Q $_"; + $m = $m eq '=' ? 'add_boolean_prefix' : 'add_prefix'; + $qp->$m($upfx, $xpfx); + } + $req->{srch}->{qp_extra_done} = 1; +} + sub dispatch { my ($req, $cmd, @argv) = @_; my $fn = $req->can("cmd_$cmd") or return; @@ -195,6 +207,8 @@ sub dispatch { $new->{qp} = $new->qparse_new; $new; }; + $req->{Q} && !$req->{srch}->{qp_extra_done} and + srch_init_extra $req; my $timeo = $req->{K}; alarm($timeo) if $timeo; $fn->($req, @argv); diff --git a/lib/PublicInbox/xap_helper.h b/lib/PublicInbox/xap_helper.h index 3df3ce91..4e809fdd 100644 --- a/lib/PublicInbox/xap_helper.h +++ b/lib/PublicInbox/xap_helper.h @@ -114,6 +114,7 @@ enum exc_iter { struct srch { int paths_len; // int for comparisons unsigned qp_flags; + bool qp_extra_done; Xapian::Database *db; Xapian::QueryParser *qp; char paths[]; // $shard_path0\0$shard_path1\0... @@ -126,6 +127,7 @@ typedef bool (*cmd)(struct req *); struct req { // argv and pfxv point into global rbuf char *argv[MY_ARG_MAX]; char *pfxv[MY_ARG_MAX]; // -A + char *qpfxv[MY_ARG_MAX]; // -A size_t *lenv; // -A LENGTH struct srch *srch; char *Pgit_dir; @@ -139,6 +141,7 @@ struct req { // argv and pfxv point into global rbuf long sort_col; // value column, negative means BoolWeight int argc; int pfxc; + int qpfxc; FILE *fp[2]; // [0] response pipe or sock, [1] status/errors (optional) bool has_input; // fp[0] is bidirectional bool collapse_threads; @@ -584,6 +587,31 @@ static bool srch_init(struct req *req) return true; } +// setup query parser for altid and arbitrary headers +static void srch_init_extra(struct req *req) +{ + const char *XPFX; + for (int i = 0; i < req->qpfxc; i++) { + size_t len = strlen(req->qpfxv[i]); + char *c = (char *)memchr(req->qpfxv[i], '=', len); + + if (c) { // it's boolean gmane:XGMANE + XPFX = c + 1; + *c = 0; + req->srch->qp->add_boolean_prefix(req->qpfxv[i], XPFX); + continue; + } + // maybe it's a non-boolean prefix "blob:XBLOBID" + c = (char *)memchr(req->qpfxv[i], ':', len); + if (!c) + errx(EXIT_FAILURE, "bad -Q %s", req->qpfxv[i]); + XPFX = c + 1; + *c = 0; + req->srch->qp->add_prefix(req->qpfxv[i], XPFX); + } + req->srch->qp_extra_done = true; +} + static void free_srch(void *p) // tdestroy { struct srch *srch = (struct srch *)p; @@ -665,12 +693,17 @@ static void dispatch(struct req *req) if (*end || req->threadid == ULLONG_MAX) ABORT("-T %s", optarg); break; + case 'Q': + req->qpfxv[req->qpfxc++] = optarg; + if (MY_ARG_MAX == req->qpfxc) ABORT("too many -Q"); + break; default: ABORT("bad switch `-%c'", c); } } ERR_CLOSE(kfp, EXIT_FAILURE); // may ENOMEM, sets kbuf.srch kbuf.srch->db = NULL; kbuf.srch->qp = NULL; + kbuf.srch->qp_extra_done = false; kbuf.srch->paths_len = size - offsetof(struct srch, paths); if (kbuf.srch->paths_len <= 0) ABORT("no -d args"); @@ -687,6 +720,8 @@ static void dispatch(struct req *req) free_srch(kbuf.srch); goto cmd_err; // srch_init already warned } + if (req->qpfxc && !req->srch->qp_extra_done) + srch_init_extra(req); if (req->timeout_sec) alarm(req->timeout_sec > UINT_MAX ? UINT_MAX : (unsigned)req->timeout_sec); diff --git a/t/www_altid.t b/t/www_altid.t index de1e6ed6..7ad4a1d2 100644 --- a/t/www_altid.t +++ b/t/www_altid.t @@ -6,7 +6,7 @@ use PublicInbox::Config; use PublicInbox::Spawn qw(spawn); require_cmd('sqlite3'); require_mods(qw(DBD::SQLite HTTP::Request::Common Plack::Test URI::Escape - Plack::Builder IO::Uncompress::Gunzip)); + Plack::Builder IO::Uncompress::Gunzip Xapian)); use_ok($_) for qw(Plack::Test HTTP::Request::Common); require_ok 'PublicInbox::Msgmap'; require_ok 'PublicInbox::AltId'; @@ -14,17 +14,13 @@ require_ok 'PublicInbox::WWW'; my ($tmpdir, $for_destroy) = tmpdir(); my $aid = 'xyz'; my $cfgpath; -my $ibx = create_inbox 'test', indexlevel => 'basic', sub { +my $spec = "serial:$aid:file=blah.sqlite3"; +my $ibx = create_inbox 'test-altid', indexlevel => 'medium', + altid => [ $spec ], sub { my ($im, $ibx) = @_; - $im->add(PublicInbox::Eml->new(<<'EOF')) or BAIL_OUT; -From: a@example.com -Message-Id: - -EOF - # $im->done; - my $spec = "serial:$aid:file=blah.sqlite3"; my $altid = PublicInbox::AltId->new($ibx, $spec, 1); $altid->mm_alt->mid_set(1, 'a@example.com'); + undef $altid; $cfgpath = "$ibx->{inboxdir}/cfg"; open my $fh, '>', $cfgpath or BAIL_OUT "open $cfgpath: $!"; print $fh <add(PublicInbox::Eml->new(<<'EOF')) or BAIL_OUT; +From: a@example.com +Message-Id: + +EOF }; $cfgpath //= "$ibx->{inboxdir}/cfg"; my $cfg = PublicInbox::Config->new($cfgpath); @@ -56,6 +57,11 @@ my $client = sub { is($mm_cmp->mid_for(1), 'a@example.com', 'sqlite3 dump valid'); $mm_cmp = undef; unlink $cmpfile or die; + + $res = $cb->(GET('/test/?q=xyz:1')); + is $res->code, 200, 'altid search hit'; + $res = $cb->(GET('/test/?q=xyz:10')); + is $res->code, 404, 'altid search miss'; }; test_psgi(sub { $www->call(@_) }, $client); SKIP: {