unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
* [PATCH] search: fix altid search with XapHelper process
@ 2024-05-05 23:35 Eric Wong
  2024-05-06  6:08 ` Eric Wong
  0 siblings, 1 reply; 2+ messages in thread
From: Eric Wong @ 2024-05-05 23:35 UTC (permalink / raw)
  To: meta

External Xapian helper processes need to support non-standard
QueryParser prefixes.  The only way to do this is to specify
these prefixes in every `mset' request since we have no idea
if the XH worker servicing the request has initialized the
extra prefixes, yet.
---
 lib/PublicInbox/Search.pm    | 20 +++++++++++++++-----
 lib/PublicInbox/XapHelper.pm | 14 ++++++++++++++
 lib/PublicInbox/xap_helper.h | 35 +++++++++++++++++++++++++++++++++++
 t/www_altid.t                | 24 +++++++++++++++---------
 4 files changed, 79 insertions(+), 14 deletions(-)

diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index fbdb48a3..e5c5d6ab 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -92,6 +92,7 @@ our @XH_SPEC = (
 	'K=i', # timeout kill after i seconds
 	'O=s', # eidx_key
 	'T=i', # threadid
+	'Q=s@', # query prefixes "$user_prefix[:=]$XPREFIX"
 );
 
 sub load_xapian () {
@@ -435,8 +436,8 @@ sub xhc_start_maybe (@) {
 	$xhc;
 }
 
-sub xh_opt ($) {
-	my ($opt) = @_;
+sub xh_opt ($$) {
+	my ($self, $opt) = @_;
 	my $lim = $opt->{limit} || 50;
 	my @ret;
 	push @ret, '-o', $opt->{offset} if $opt->{offset};
@@ -458,7 +459,16 @@ sub xh_opt ($) {
 	push @ret, '-t' if $opt->{threads};
 	push @ret, '-T', $opt->{threadid} if defined $opt->{threadid};
 	push @ret, '-O', $opt->{eidx_key} if defined $opt->{eidx_key};
-	@ret;
+	my $apfx = $self->{-alt_pfx} //= do {
+		my @tmp;
+		for (grep /\Aserial:/, @{$self->{altid} // []}) {
+			my (undef, $pfx) = split /:/, $_;
+			push @tmp, '-Q', "$pfx=X\U$pfx";
+		}
+		# TODO: arbitrary header indexing goes here
+		\@tmp;
+	};
+	(@ret, @$apfx);
 }
 
 # returns a true value if actually handled asynchronously,
@@ -467,7 +477,7 @@ sub async_mset {
 	my ($self, $qry_str, $opt, $cb, @args) = @_;
 	if ($XHC) { # unconditionally retrieving pct + rank for now
 		xdb($self); # populate {nshards}
-		my @margs = ($self->xh_args, xh_opt($opt));
+		my @margs = ($self->xh_args, xh_opt($self, $opt));
 		my $ret = eval {
 			my $rd = $XHC->mkreq(undef, 'mset', @margs, $qry_str);
 			PublicInbox::XhcMset->maybe_new($rd, $self, $cb, @args);
@@ -630,7 +640,7 @@ EOM
 			$ret .= qq{\tqp->add_boolean_prefix("$name", "$_");\n}
 		}
 	}
-	# TODO: altid support
+	# altid support is handled in xh_opt and srch_init_extra in XH
 	for my $name (sort keys %prob_prefix) {
 		for (split(/ /, $prob_prefix{$name})) {
 			$ret .= qq{\tqp->add_prefix("$name", "$_");\n}
diff --git a/lib/PublicInbox/XapHelper.pm b/lib/PublicInbox/XapHelper.pm
index 2e20660e..099bc4fe 100644
--- a/lib/PublicInbox/XapHelper.pm
+++ b/lib/PublicInbox/XapHelper.pm
@@ -172,6 +172,18 @@ sub cmd_mset { # to be used by WWW + IMAP
 	}
 }
 
+sub srch_init_extra ($) {
+	my ($req) = @_;
+	my $qp = $req->{srch}->{qp};
+	for (@{$req->{Q}}) {
+		my ($upfx, $m, $xpfx) = split /([:=])/;
+		$xpfx // die "E: bad -Q $_";
+		$m = $m eq '=' ? 'add_boolean_prefix' : 'add_prefix';
+		$qp->$m($upfx, $xpfx);
+	}
+	$req->{srch}->{qp_extra_done} = 1;
+}
+
 sub dispatch {
 	my ($req, $cmd, @argv) = @_;
 	my $fn = $req->can("cmd_$cmd") or return;
@@ -195,6 +207,8 @@ sub dispatch {
 		$new->{qp} = $new->qparse_new;
 		$new;
 	};
+	$req->{Q} && !$req->{srch}->{qp_extra_done} and
+		srch_init_extra $req;
 	my $timeo = $req->{K};
 	alarm($timeo) if $timeo;
 	$fn->($req, @argv);
diff --git a/lib/PublicInbox/xap_helper.h b/lib/PublicInbox/xap_helper.h
index 3df3ce91..4e809fdd 100644
--- a/lib/PublicInbox/xap_helper.h
+++ b/lib/PublicInbox/xap_helper.h
@@ -114,6 +114,7 @@ enum exc_iter {
 struct srch {
 	int paths_len; // int for comparisons
 	unsigned qp_flags;
+	bool qp_extra_done;
 	Xapian::Database *db;
 	Xapian::QueryParser *qp;
 	char paths[]; // $shard_path0\0$shard_path1\0...
@@ -126,6 +127,7 @@ typedef bool (*cmd)(struct req *);
 struct req { // argv and pfxv point into global rbuf
 	char *argv[MY_ARG_MAX];
 	char *pfxv[MY_ARG_MAX]; // -A <prefix>
+	char *qpfxv[MY_ARG_MAX]; // -A <prefix>
 	size_t *lenv; // -A <prefix>LENGTH
 	struct srch *srch;
 	char *Pgit_dir;
@@ -139,6 +141,7 @@ struct req { // argv and pfxv point into global rbuf
 	long sort_col; // value column, negative means BoolWeight
 	int argc;
 	int pfxc;
+	int qpfxc;
 	FILE *fp[2]; // [0] response pipe or sock, [1] status/errors (optional)
 	bool has_input; // fp[0] is bidirectional
 	bool collapse_threads;
@@ -584,6 +587,31 @@ static bool srch_init(struct req *req)
 	return true;
 }
 
+// setup query parser for altid and arbitrary headers
+static void srch_init_extra(struct req *req)
+{
+	const char *XPFX;
+	for (int i = 0; i < req->qpfxc; i++) {
+		size_t len = strlen(req->qpfxv[i]);
+		char *c = (char *)memchr(req->qpfxv[i], '=', len);
+
+		if (c) { // it's boolean gmane:XGMANE
+			XPFX = c + 1;
+			*c = 0;
+			req->srch->qp->add_boolean_prefix(req->qpfxv[i], XPFX);
+			continue;
+		}
+		// maybe it's a non-boolean prefix "blob:XBLOBID"
+		c = (char *)memchr(req->qpfxv[i], ':', len);
+		if (!c)
+			errx(EXIT_FAILURE, "bad -Q %s", req->qpfxv[i]);
+		XPFX = c + 1;
+		*c = 0;
+		req->srch->qp->add_prefix(req->qpfxv[i], XPFX);
+	}
+	req->srch->qp_extra_done = true;
+}
+
 static void free_srch(void *p) // tdestroy
 {
 	struct srch *srch = (struct srch *)p;
@@ -665,12 +693,17 @@ static void dispatch(struct req *req)
 			if (*end || req->threadid == ULLONG_MAX)
 				ABORT("-T %s", optarg);
 			break;
+		case 'Q':
+			req->qpfxv[req->qpfxc++] = optarg;
+			if (MY_ARG_MAX == req->qpfxc) ABORT("too many -Q");
+			break;
 		default: ABORT("bad switch `-%c'", c);
 		}
 	}
 	ERR_CLOSE(kfp, EXIT_FAILURE); // may ENOMEM, sets kbuf.srch
 	kbuf.srch->db = NULL;
 	kbuf.srch->qp = NULL;
+	kbuf.srch->qp_extra_done = false;
 	kbuf.srch->paths_len = size - offsetof(struct srch, paths);
 	if (kbuf.srch->paths_len <= 0)
 		ABORT("no -d args");
@@ -687,6 +720,8 @@ static void dispatch(struct req *req)
 		free_srch(kbuf.srch);
 		goto cmd_err; // srch_init already warned
 	}
+	if (req->qpfxc && !req->srch->qp_extra_done)
+		srch_init_extra(req);
 	if (req->timeout_sec)
 		alarm(req->timeout_sec > UINT_MAX ?
 			UINT_MAX : (unsigned)req->timeout_sec);
diff --git a/t/www_altid.t b/t/www_altid.t
index de1e6ed6..7ad4a1d2 100644
--- a/t/www_altid.t
+++ b/t/www_altid.t
@@ -6,7 +6,7 @@ use PublicInbox::Config;
 use PublicInbox::Spawn qw(spawn);
 require_cmd('sqlite3');
 require_mods(qw(DBD::SQLite HTTP::Request::Common Plack::Test URI::Escape
-	Plack::Builder IO::Uncompress::Gunzip));
+	Plack::Builder IO::Uncompress::Gunzip Xapian));
 use_ok($_) for qw(Plack::Test HTTP::Request::Common);
 require_ok 'PublicInbox::Msgmap';
 require_ok 'PublicInbox::AltId';
@@ -14,17 +14,13 @@ require_ok 'PublicInbox::WWW';
 my ($tmpdir, $for_destroy) = tmpdir();
 my $aid = 'xyz';
 my $cfgpath;
-my $ibx = create_inbox 'test', indexlevel => 'basic', sub {
+my $spec = "serial:$aid:file=blah.sqlite3";
+my $ibx = create_inbox 'test-altid', indexlevel => 'medium',
+		altid => [ $spec ], sub {
 	my ($im, $ibx) = @_;
-	$im->add(PublicInbox::Eml->new(<<'EOF')) or BAIL_OUT;
-From: a@example.com
-Message-Id: <a@example.com>
-
-EOF
-	# $im->done;
-	my $spec = "serial:$aid:file=blah.sqlite3";
 	my $altid = PublicInbox::AltId->new($ibx, $spec, 1);
 	$altid->mm_alt->mid_set(1, 'a@example.com');
+	undef $altid;
 	$cfgpath = "$ibx->{inboxdir}/cfg";
 	open my $fh, '>', $cfgpath or BAIL_OUT "open $cfgpath: $!";
 	print $fh <<EOF or BAIL_OUT $!;
@@ -35,6 +31,11 @@ EOF
 	url = http://example.com/test
 EOF
 	close $fh or BAIL_OUT $!;
+	$im->add(PublicInbox::Eml->new(<<'EOF')) or BAIL_OUT;
+From: a@example.com
+Message-Id: <a@example.com>
+
+EOF
 };
 $cfgpath //= "$ibx->{inboxdir}/cfg";
 my $cfg = PublicInbox::Config->new($cfgpath);
@@ -56,6 +57,11 @@ my $client = sub {
 	is($mm_cmp->mid_for(1), 'a@example.com', 'sqlite3 dump valid');
 	$mm_cmp = undef;
 	unlink $cmpfile or die;
+
+	$res = $cb->(GET('/test/?q=xyz:1'));
+	is $res->code, 200, 'altid search hit';
+	$res = $cb->(GET('/test/?q=xyz:10'));
+	is $res->code, 404, 'altid search miss';
 };
 test_psgi(sub { $www->call(@_) }, $client);
 SKIP: {

^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2024-05-06  6:16 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-05-05 23:35 [PATCH] search: fix altid search with XapHelper process Eric Wong
2024-05-06  6:08 ` Eric Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).