From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 02/12] lei q: retrieve keywords for local, non-external messages
Date: Thu, 21 Jan 2021 19:46:14 +0000 [thread overview]
Message-ID: <20210121194624.32002-3-e@80x24.org> (raw)
In-Reply-To: <20210121194624.32002-1-e@80x24.org>
This isn't tested for now, so maybe it works.
---
lib/PublicInbox/LeiOverview.pm | 8 +++-----
lib/PublicInbox/LeiSearch.pm | 16 +++-------------
lib/PublicInbox/LeiXSearch.pm | 14 ++++++++++----
lib/PublicInbox/Search.pm | 20 +++++++++++++++++++-
4 files changed, 35 insertions(+), 23 deletions(-)
diff --git a/lib/PublicInbox/LeiOverview.pm b/lib/PublicInbox/LeiOverview.pm
index 8799f1cc..47d9eb31 100644
--- a/lib/PublicInbox/LeiOverview.pm
+++ b/lib/PublicInbox/LeiOverview.pm
@@ -224,9 +224,8 @@ sub ovv_each_smsg_cb { # runs in wq worker usually
my $git_dir = $git->{git_dir};
sub {
my ($smsg, $mitem) = @_;
- my $kw = []; # TODO get from mitem
$l2m->wq_do('write_mail', \@io, $git_dir,
- $smsg->{blob}, $lei_ipc, $kw)
+ $smsg->{blob}, $lei_ipc, $smsg->{kw});
}
} elsif ($l2m) {
my $wcb = $l2m->write_cb($lei);
@@ -235,8 +234,8 @@ sub ovv_each_smsg_cb { # runs in wq worker usually
my $g2m = $l2m->can('git_to_mail');
sub {
my ($smsg, $mitem) = @_;
- my $kw = []; # TODO get from mitem
- $git->cat_async($smsg->{blob}, $g2m, [ $wcb, $kw ]);
+ $git->cat_async($smsg->{blob}, $g2m,
+ [ $wcb, $smsg->{kw} ]);
};
} elsif ($self->{fmt} =~ /\A(concat)?json\z/ && $lei->{opt}->{pretty}) {
my $EOR = ($1//'') eq 'concat' ? "\n}" : "\n},";
@@ -266,7 +265,6 @@ sub ovv_each_smsg_cb { # runs in wq worker usually
$lei->{ovv_buf} = \(my $buf = '');
sub {
my ($smsg, $mitem) = @_;
- delete @$smsg{qw(tid num)};
$buf .= $json->encode(_unbless_smsg(@_)) . $ORS;
if (length($buf) > 65536) {
my $lk = $self->lock_for_scope;
diff --git a/lib/PublicInbox/LeiSearch.pm b/lib/PublicInbox/LeiSearch.pm
index b7e337de..440bacf5 100644
--- a/lib/PublicInbox/LeiSearch.pm
+++ b/lib/PublicInbox/LeiSearch.pm
@@ -5,7 +5,7 @@ package PublicInbox::LeiSearch;
use strict;
use v5.10.1;
use parent qw(PublicInbox::ExtSearch);
-use PublicInbox::Search;
+use PublicInbox::Search qw(xap_terms);
# get combined docid from over.num:
# (not generic Xapian, only works with our sharding scheme)
@@ -19,19 +19,9 @@ sub msg_keywords {
my ($self, $num) = @_; # num_or_mitem
my $xdb = $self->xdb; # set {nshard};
my $docid = ref($num) ? $num->get_docid : num2docid($self, $num);
- my %kw;
- eval {
- my $end = $xdb->termlist_end($docid);
- my $cur = $xdb->termlist_begin($docid);
- for (; $cur != $end; $cur++) {
- $cur->skip_to('K');
- last if $cur == $end;
- my $kw = $cur->get_termname;
- $kw =~ s/\AK//s and $kw{$kw} = undef;
- }
- };
+ my $kw = xap_terms('K', $xdb, $docid);
warn "E: #$docid ($num): $@\n" if $@;
- wantarray ? sort(keys(%kw)) : \%kw;
+ wantarray ? sort(keys(%$kw)) : $kw;
}
1;
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index a6d827de..d7688ede 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -13,6 +13,7 @@ use PublicInbox::OpPipe;
use PublicInbox::Import;
use File::Temp 0.19 (); # 0.19 for ->newdir
use File::Spec ();
+use PublicInbox::Search qw(xap_terms);
sub new {
my ($class) = @_;
@@ -74,7 +75,12 @@ sub smsg_for {
my $docid = $mitem->get_docid;
my $shard = ($docid - 1) % $nshard;
my $num = int(($docid - 1) / $nshard) + 1;
- my $smsg = $self->{shard2ibx}->[$shard]->over->get_art($num);
+ my $ibx = $self->{shard2ibx}->[$shard];
+ my $smsg = $ibx->over->get_art($num);
+ if (ref($ibx->can('msg_keywords'))) {
+ my $kw = xap_terms('K', $mitem->get_document);
+ $smsg->{kw} = [ sort keys %$kw ];
+ }
$smsg->{docid} = $docid;
$smsg;
}
@@ -153,11 +159,11 @@ sub query_mset { # non-parallel for non-"--thread" users
$dedupe->prepare_dedupe;
do {
$mset = $self->mset($mo->{qstr}, $mo);
- for my $it ($mset->items) {
- my $smsg = smsg_for($self, $it) or next;
+ for my $mitem ($mset->items) {
+ my $smsg = smsg_for($self, $mitem) or next;
wait_startq($startq) if $startq;
next if $dedupe->is_smsg_dup($smsg);
- $each_smsg->($smsg, $it);
+ $each_smsg->($smsg, $mitem);
}
} while (_mset_more($mset, $mo));
undef $each_smsg; # drops @io for l2m->{each_smsg_done}
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index a4b40f94..7c6a16be 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -6,7 +6,7 @@
package PublicInbox::Search;
use strict;
use parent qw(Exporter);
-our @EXPORT_OK = qw(retry_reopen int_val get_pct);
+our @EXPORT_OK = qw(retry_reopen int_val get_pct xap_terms);
use List::Util qw(max);
# values for searching, changing the numeric value breaks
@@ -432,4 +432,22 @@ sub get_pct ($) { # mset item
$n > 99 ? 99 : $n;
}
+sub xap_terms ($$;@) {
+ my ($pfx, $xdb_or_doc, @docid) = @_; # @docid may be empty ()
+ my %ret;
+ eval {
+ my $end = $xdb_or_doc->termlist_end(@docid);
+ my $cur = $xdb_or_doc->termlist_begin(@docid);
+ for (; $cur != $end; $cur++) {
+ $cur->skip_to($pfx);
+ last if $cur == $end;
+ my $tn = $cur->get_termname;
+ if (index($tn, $pfx) == 0) {
+ $ret{substr($tn, length($pfx))} = undef;
+ }
+ }
+ };
+ \%ret;
+}
+
1;
next prev parent reply other threads:[~2021-01-21 19:46 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-01-21 19:46 [PATCH 00/12] lei: another dump Eric Wong
2021-01-21 19:46 ` [PATCH 01/12] lei_overview: rename {relevance} => {pct} Eric Wong
2021-01-21 19:46 ` Eric Wong [this message]
2021-01-21 19:46 ` [PATCH 03/12] lei_xsearch: eliminate some unused, commented-out code Eric Wong
2021-01-21 19:46 ` [PATCH 04/12] lei: show {pct} and {oid} in From_ lines and filenames Eric Wong
2021-01-21 19:46 ` [PATCH 05/12] lei: fix inadvertant FD sharing Eric Wong
2021-01-21 19:46 ` [PATCH 06/12] lei_to_mail: avoid segfault on exit Eric Wong
2021-01-21 19:46 ` [PATCH 07/12] lei: oneshot: use client $io[2] for placeholder Eric Wong
2021-01-21 19:46 ` [PATCH 08/12] lei: remove INT/QUIT/TERM handlers, fix daemon EOF Eric Wong
2021-01-21 19:46 ` [PATCH 09/12] lei_xsearch: reduce reference paths to lxs Eric Wong
2021-01-21 19:46 ` [PATCH 10/12] lei: remove @TO_CLOSE_ATFORK_CHILD Eric Wong
2021-01-21 19:46 ` [PATCH 11/12] lei: forget-external support with canonicalization Eric Wong
2021-01-21 19:46 ` [PATCH 12/12] lei forget-external: bash completion support Eric Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210121194624.32002-3-e@80x24.org \
--to=e@80x24.org \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).