unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 02/12] lei q: retrieve keywords for local, non-external messages
Date: Thu, 21 Jan 2021 19:46:14 +0000	[thread overview]
Message-ID: <20210121194624.32002-3-e@80x24.org> (raw)
In-Reply-To: <20210121194624.32002-1-e@80x24.org>

This isn't tested for now, so maybe it works.
---
 lib/PublicInbox/LeiOverview.pm |  8 +++-----
 lib/PublicInbox/LeiSearch.pm   | 16 +++-------------
 lib/PublicInbox/LeiXSearch.pm  | 14 ++++++++++----
 lib/PublicInbox/Search.pm      | 20 +++++++++++++++++++-
 4 files changed, 35 insertions(+), 23 deletions(-)

diff --git a/lib/PublicInbox/LeiOverview.pm b/lib/PublicInbox/LeiOverview.pm
index 8799f1cc..47d9eb31 100644
--- a/lib/PublicInbox/LeiOverview.pm
+++ b/lib/PublicInbox/LeiOverview.pm
@@ -224,9 +224,8 @@ sub ovv_each_smsg_cb { # runs in wq worker usually
 		my $git_dir = $git->{git_dir};
 		sub {
 			my ($smsg, $mitem) = @_;
-			my $kw = []; # TODO get from mitem
 			$l2m->wq_do('write_mail', \@io, $git_dir,
-					$smsg->{blob}, $lei_ipc, $kw)
+					$smsg->{blob}, $lei_ipc, $smsg->{kw});
 		}
 	} elsif ($l2m) {
 		my $wcb = $l2m->write_cb($lei);
@@ -235,8 +234,8 @@ sub ovv_each_smsg_cb { # runs in wq worker usually
 		my $g2m = $l2m->can('git_to_mail');
 		sub {
 			my ($smsg, $mitem) = @_;
-			my $kw = []; # TODO get from mitem
-			$git->cat_async($smsg->{blob}, $g2m, [ $wcb, $kw ]);
+			$git->cat_async($smsg->{blob}, $g2m,
+					[ $wcb, $smsg->{kw} ]);
 		};
 	} elsif ($self->{fmt} =~ /\A(concat)?json\z/ && $lei->{opt}->{pretty}) {
 		my $EOR = ($1//'') eq 'concat' ? "\n}" : "\n},";
@@ -266,7 +265,6 @@ sub ovv_each_smsg_cb { # runs in wq worker usually
 		$lei->{ovv_buf} = \(my $buf = '');
 		sub {
 			my ($smsg, $mitem) = @_;
-			delete @$smsg{qw(tid num)};
 			$buf .= $json->encode(_unbless_smsg(@_)) . $ORS;
 			if (length($buf) > 65536) {
 				my $lk = $self->lock_for_scope;
diff --git a/lib/PublicInbox/LeiSearch.pm b/lib/PublicInbox/LeiSearch.pm
index b7e337de..440bacf5 100644
--- a/lib/PublicInbox/LeiSearch.pm
+++ b/lib/PublicInbox/LeiSearch.pm
@@ -5,7 +5,7 @@ package PublicInbox::LeiSearch;
 use strict;
 use v5.10.1;
 use parent qw(PublicInbox::ExtSearch);
-use PublicInbox::Search;
+use PublicInbox::Search qw(xap_terms);
 
 # get combined docid from over.num:
 # (not generic Xapian, only works with our sharding scheme)
@@ -19,19 +19,9 @@ sub msg_keywords {
 	my ($self, $num) = @_; # num_or_mitem
 	my $xdb = $self->xdb; # set {nshard};
 	my $docid = ref($num) ? $num->get_docid : num2docid($self, $num);
-	my %kw;
-	eval {
-		my $end = $xdb->termlist_end($docid);
-		my $cur = $xdb->termlist_begin($docid);
-		for (; $cur != $end; $cur++) {
-			$cur->skip_to('K');
-			last if $cur == $end;
-			my $kw = $cur->get_termname;
-			$kw =~ s/\AK//s and $kw{$kw} = undef;
-		}
-	};
+	my $kw = xap_terms('K', $xdb, $docid);
 	warn "E: #$docid ($num): $@\n" if $@;
-	wantarray ? sort(keys(%kw)) : \%kw;
+	wantarray ? sort(keys(%$kw)) : $kw;
 }
 
 1;
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index a6d827de..d7688ede 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -13,6 +13,7 @@ use PublicInbox::OpPipe;
 use PublicInbox::Import;
 use File::Temp 0.19 (); # 0.19 for ->newdir
 use File::Spec ();
+use PublicInbox::Search qw(xap_terms);
 
 sub new {
 	my ($class) = @_;
@@ -74,7 +75,12 @@ sub smsg_for {
 	my $docid = $mitem->get_docid;
 	my $shard = ($docid - 1) % $nshard;
 	my $num = int(($docid - 1) / $nshard) + 1;
-	my $smsg = $self->{shard2ibx}->[$shard]->over->get_art($num);
+	my $ibx = $self->{shard2ibx}->[$shard];
+	my $smsg = $ibx->over->get_art($num);
+	if (ref($ibx->can('msg_keywords'))) {
+		my $kw = xap_terms('K', $mitem->get_document);
+		$smsg->{kw} = [ sort keys %$kw ];
+	}
 	$smsg->{docid} = $docid;
 	$smsg;
 }
@@ -153,11 +159,11 @@ sub query_mset { # non-parallel for non-"--thread" users
 	$dedupe->prepare_dedupe;
 	do {
 		$mset = $self->mset($mo->{qstr}, $mo);
-		for my $it ($mset->items) {
-			my $smsg = smsg_for($self, $it) or next;
+		for my $mitem ($mset->items) {
+			my $smsg = smsg_for($self, $mitem) or next;
 			wait_startq($startq) if $startq;
 			next if $dedupe->is_smsg_dup($smsg);
-			$each_smsg->($smsg, $it);
+			$each_smsg->($smsg, $mitem);
 		}
 	} while (_mset_more($mset, $mo));
 	undef $each_smsg; # drops @io for l2m->{each_smsg_done}
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index a4b40f94..7c6a16be 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -6,7 +6,7 @@
 package PublicInbox::Search;
 use strict;
 use parent qw(Exporter);
-our @EXPORT_OK = qw(retry_reopen int_val get_pct);
+our @EXPORT_OK = qw(retry_reopen int_val get_pct xap_terms);
 use List::Util qw(max);
 
 # values for searching, changing the numeric value breaks
@@ -432,4 +432,22 @@ sub get_pct ($) { # mset item
 	$n > 99 ? 99 : $n;
 }
 
+sub xap_terms ($$;@) {
+	my ($pfx, $xdb_or_doc, @docid) = @_; # @docid may be empty ()
+	my %ret;
+	eval {
+		my $end = $xdb_or_doc->termlist_end(@docid);
+		my $cur = $xdb_or_doc->termlist_begin(@docid);
+		for (; $cur != $end; $cur++) {
+			$cur->skip_to($pfx);
+			last if $cur == $end;
+			my $tn = $cur->get_termname;
+			if (index($tn, $pfx) == 0) {
+				$ret{substr($tn, length($pfx))} = undef;
+			}
+		}
+	};
+	\%ret;
+}
+
 1;

  parent reply	other threads:[~2021-01-21 19:46 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-01-21 19:46 [PATCH 00/12] lei: another dump Eric Wong
2021-01-21 19:46 ` [PATCH 01/12] lei_overview: rename {relevance} => {pct} Eric Wong
2021-01-21 19:46 ` Eric Wong [this message]
2021-01-21 19:46 ` [PATCH 03/12] lei_xsearch: eliminate some unused, commented-out code Eric Wong
2021-01-21 19:46 ` [PATCH 04/12] lei: show {pct} and {oid} in From_ lines and filenames Eric Wong
2021-01-21 19:46 ` [PATCH 05/12] lei: fix inadvertant FD sharing Eric Wong
2021-01-21 19:46 ` [PATCH 06/12] lei_to_mail: avoid segfault on exit Eric Wong
2021-01-21 19:46 ` [PATCH 07/12] lei: oneshot: use client $io[2] for placeholder Eric Wong
2021-01-21 19:46 ` [PATCH 08/12] lei: remove INT/QUIT/TERM handlers, fix daemon EOF Eric Wong
2021-01-21 19:46 ` [PATCH 09/12] lei_xsearch: reduce reference paths to lxs Eric Wong
2021-01-21 19:46 ` [PATCH 10/12] lei: remove @TO_CLOSE_ATFORK_CHILD Eric Wong
2021-01-21 19:46 ` [PATCH 11/12] lei: forget-external support with canonicalization Eric Wong
2021-01-21 19:46 ` [PATCH 12/12] lei forget-external: bash completion support Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210121194624.32002-3-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).