From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 918381FA00 for ; Fri, 26 Mar 2021 04:29:37 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 3/3] lei: add some labels support Date: Fri, 26 Mar 2021 06:29:37 +0200 Message-Id: <20210326042937.15913-4-e@80x24.org> In-Reply-To: <20210326042937.15913-1-e@80x24.org> References: <20210326042937.15913-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: "lei q" now displays labels in JSON output, "lei mark" can add or remove labels for any messages. "lei ls-label" is supported, too. Unfortunately, "lei q" won't hande "kw:" or "L:" for external messages, they must be imported, first. --- MANIFEST | 1 + lib/PublicInbox/LEI.pm | 1 + lib/PublicInbox/LeiLsLabel.pm | 17 ++++++++++ lib/PublicInbox/LeiMark.pm | 6 ++-- lib/PublicInbox/LeiOverview.pm | 4 +-- lib/PublicInbox/LeiSearch.pm | 37 ++++++++++++++++++--- lib/PublicInbox/LeiStore.pm | 59 +++++++++++++++++++++++++--------- lib/PublicInbox/LeiXSearch.pm | 13 +++++--- lib/PublicInbox/Search.pm | 6 ++-- lib/PublicInbox/SearchIdx.pm | 2 +- t/lei-mark.t | 46 +++++++++++++++++++++++--- 11 files changed, 156 insertions(+), 36 deletions(-) create mode 100644 lib/PublicInbox/LeiLsLabel.pm diff --git a/MANIFEST b/MANIFEST index 87e4b616..6b2b33ac 100644 --- a/MANIFEST +++ b/MANIFEST @@ -188,6 +188,7 @@ lib/PublicInbox/LeiExternal.pm lib/PublicInbox/LeiHelp.pm lib/PublicInbox/LeiImport.pm lib/PublicInbox/LeiInput.pm +lib/PublicInbox/LeiLsLabel.pm lib/PublicInbox/LeiMark.pm lib/PublicInbox/LeiMirror.pm lib/PublicInbox/LeiOverview.pm diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index b42ba0ae..fab2af90 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -145,6 +145,7 @@ our %CMD = ( # sorted in order of importance/use: PublicInbox::LeiQuery::curl_opt() ], 'ls-external' => [ '[FILTER]', 'list publicinbox|extindex locations', qw(format|f=s z|0 globoff|g invert-match|v local remote), @c_opt ], +'ls-label' => [ '', 'list labels', qw(z|0 stats:s), @c_opt ], 'forget-external' => [ 'LOCATION...|--prune', 'exclude further results from a publicinbox|extindex', qw(prune), @c_opt ], diff --git a/lib/PublicInbox/LeiLsLabel.pm b/lib/PublicInbox/LeiLsLabel.pm new file mode 100644 index 00000000..474224d4 --- /dev/null +++ b/lib/PublicInbox/LeiLsLabel.pm @@ -0,0 +1,17 @@ +# Copyright (C) 2021 all contributors +# License: AGPL-3.0+ + +# "lei ls-label" command +package PublicInbox::LeiLsLabel; +use strict; +use v5.10.1; + +sub lei_ls_label { # the "lei ls-label" method + my ($lei, @argv) = @_; + # TODO: document stats/counts (expensive) + my @L = eval { $lei->_lei_store->search->all_terms('L') }; + my $ORS = $lei->{opt}->{z} ? "\0" : "\n"; + $lei->out(map { $_.$ORS } @L); +} + +1; diff --git a/lib/PublicInbox/LeiMark.pm b/lib/PublicInbox/LeiMark.pm index 9d77f4b4..7a2ccf77 100644 --- a/lib/PublicInbox/LeiMark.pm +++ b/lib/PublicInbox/LeiMark.pm @@ -60,7 +60,7 @@ sub vmd_mod_extract { sub input_eml_cb { # used by PublicInbox::LeiInput::input_fh my ($self, $eml) = @_; if (my $xoids = $self->{lei}->{ale}->xoids_for($eml)) { - $self->{lei}->{sto}->ipc_do('update_xvmd', $xoids, + $self->{lei}->{sto}->ipc_do('update_xvmd', $xoids, $eml, $self->{vmd_mod}); } else { ++$self->{missing}; @@ -168,7 +168,9 @@ sub _complete_mark_common ($) { # FIXME: same problems as _complete_forget_external and similar sub _complete_mark { my ($self, @argv) = @_; - my @all = map { ("+kw:$_", "-kw:$_") } @KW; + my @L = eval { $self->_lei_store->search->all_terms('L') }; + my @all = ((map { ("+kw:$_", "-kw:$_") } @KW), + (map { ("+L:$_", "-L:$_") } @L)); return @all if !@argv; my ($cur, $re) = _complete_mark_common(\@argv); map { diff --git a/lib/PublicInbox/LeiOverview.pm b/lib/PublicInbox/LeiOverview.pm index 1ce2a098..b4d81328 100644 --- a/lib/PublicInbox/LeiOverview.pm +++ b/lib/PublicInbox/LeiOverview.pm @@ -227,7 +227,7 @@ sub ovv_each_smsg_cb { # runs in wq worker usually sub { # DIY prettiness :P my ($smsg, $mitem) = @_; return if $dedupe->is_smsg_dup($smsg); - $lse->xsmsg_vmd($smsg); + $lse->xsmsg_vmd($smsg, $smsg->{L} ? undef : 1); $smsg = _unbless_smsg($smsg, $mitem); $buf .= "{\n"; $buf .= join(",\n", map { @@ -251,7 +251,7 @@ sub ovv_each_smsg_cb { # runs in wq worker usually sub { my ($smsg, $mitem) = @_; return if $dedupe->is_smsg_dup($smsg); - $lse->xsmsg_vmd($smsg); + $lse->xsmsg_vmd($smsg, $smsg->{L} ? undef : 1); $buf .= $json->encode(_unbless_smsg(@_)) . $ORS; return if length($buf) < 65536; my $lk = $self->lock_for_scope; diff --git a/lib/PublicInbox/LeiSearch.pm b/lib/PublicInbox/LeiSearch.pm index bbb00661..07d570ec 100644 --- a/lib/PublicInbox/LeiSearch.pm +++ b/lib/PublicInbox/LeiSearch.pm @@ -27,18 +27,25 @@ sub msg_keywords { wantarray ? sort(keys(%$kw)) : $kw; } +# lookup keywords+labels for external messages sub xsmsg_vmd { - my ($self, $smsg) = @_; + my ($self, $smsg, $want_label) = @_; return if $smsg->{kw}; my $xdb = $self->xdb; # set {nshard}; - my %kw; + my (%kw, %L, $doc, $x); $kw{flagged} = 1 if delete($smsg->{lei_q_tt_flagged}); my @num = $self->over->blob_exists($smsg->{blob}); for my $num (@num) { # there should only be one... - my $kw = xap_terms('K', $xdb, num2docid($self, $num)); - %kw = (%kw, %$kw); + $doc = $xdb->get_document(num2docid($self, $num)); + $x = xap_terms('K', $doc); + %kw = (%kw, %$x); + if ($want_label) { # JSON/JMAP only + $x = xap_terms('L', $doc); + %L = (%L, %$x); + } } $smsg->{kw} = [ sort keys %kw ] if scalar(keys(%kw)); + $smsg->{L} = [ sort keys %L ] if scalar(keys(%L)); } # when a message has no Message-IDs at all, this is needed for @@ -100,4 +107,26 @@ sub kw_changed { join("\0", @$new_kw_sorted) eq join("\0", @cur_kw) ? 0 : 1; } +sub all_terms { + my ($self, $pfx) = @_; + my $xdb = $self->xdb; + my $cur = $xdb->allterms_begin($pfx); + my $end = $xdb->allterms_end($pfx); + my %ret; + for (; $cur != $end; $cur++) { + my $tn = $cur->get_termname; + index($tn, $pfx) == 0 and + $ret{substr($tn, length($pfx))} = undef; + } + wantarray ? (sort keys %ret) : \%ret; +} + +sub qparse_new { + my ($self) = @_; + my $qp = $self->SUPER::qparse_new; # PublicInbox::Search + $qp->add_boolean_prefix('kw', 'K'); + $qp->add_boolean_prefix('L', 'L'); + $qp +} + 1; diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm index 1311ad46..b76af4d3 100644 --- a/lib/PublicInbox/LeiStore.pm +++ b/lib/PublicInbox/LeiStore.pm @@ -228,8 +228,30 @@ sub set_eml { set_eml_vmd($self, $eml, $vmd); } +sub _external_only ($$$) { + my ($self, $xoids, $eml) = @_; + my $eidx = $self->{priv_eidx}; + my $oidx = $eidx->{oidx} // die 'BUG: {oidx} missing'; + my $smsg = bless { blob => '' }, 'PublicInbox::Smsg'; + $smsg->{num} = $oidx->adj_counter('eidx_docid', '+'); + # save space for an externals-only message + my $hdr = $eml->header_obj; + $smsg->populate($hdr); # sets lines == 0 + $smsg->{bytes} = 0; + delete @$smsg{qw(From Subject)}; + $smsg->{to} = $smsg->{cc} = $smsg->{from} = ''; + $oidx->add_overview($hdr, $smsg); # subject+references for threading + $smsg->{subject} = ''; + for my $oid (keys %$xoids) { + $oidx->add_xref3($smsg->{num}, -1, $oid, '.'); + } + my $idx = $eidx->idx_shard($smsg->{num}); + $idx->index_eml(PublicInbox::Eml->new("\n\n"), $smsg); + ($smsg, $idx); +} + sub update_xvmd { - my ($self, $xoids, $vmd_mod) = @_; + my ($self, $xoids, $eml, $vmd_mod) = @_; my $eidx = eidx_init($self); my $oidx = $eidx->{oidx}; my %seen; @@ -242,7 +264,25 @@ sub update_xvmd { my $idx = $eidx->idx_shard($docid); $idx->ipc_do('update_vmd', $docid, $vmd_mod); } + delete $xoids->{$oid}; } + return unless scalar(keys(%$xoids)); + + # see if it was indexed, but with different OID(s) + if (my @docids = _docids_for($self, $eml)) { + for my $docid (@docids) { + next if $seen{$docid}; + for my $oid (keys %$xoids) { + $oidx->add_xref3($docid, -1, $oid, '.'); + } + my $idx = $eidx->idx_shard($docid); + $idx->ipc_do('update_vmd', $docid, $vmd_mod); + } + return; + } + # totally unseen + my ($smsg, $idx) = _external_only($self, $xoids, $eml); + $idx->ipc_do('update_vmd', $smsg->{num}, $vmd_mod); } # set or update keywords for external message, called via ipc_do @@ -270,6 +310,7 @@ sub set_xvmd { # see if it was indexed, but with different OID(s) if (my @docids = _docids_for($self, $eml)) { for my $docid (@docids) { + next if $seen{$docid}; for my $oid (keys %$xoids) { $oidx->add_xref3($docid, -1, $oid, '.'); } @@ -279,21 +320,7 @@ sub set_xvmd { return; } # totally unseen - my $smsg = bless { blob => '' }, 'PublicInbox::Smsg'; - $smsg->{num} = $oidx->adj_counter('eidx_docid', '+'); - # save space for an externals-only message - my $hdr = $eml->header_obj; - $smsg->populate($hdr); # sets lines == 0 - $smsg->{bytes} = 0; - delete @$smsg{qw(From Subject)}; - $smsg->{to} = $smsg->{cc} = $smsg->{from} = ''; - $oidx->add_overview($hdr, $smsg); # subject+references for threading - $smsg->{subject} = ''; - for my $oid (keys %$xoids) { - $oidx->add_xref3($smsg->{num}, -1, $oid, '.'); - } - my $idx = $eidx->idx_shard($smsg->{num}); - $idx->index_eml(PublicInbox::Eml->new("\n\n"), $smsg); + my ($smsg, $idx) = _external_only($self, $xoids, $eml); $idx->ipc_do('add_vmd', $smsg->{num}, $vmd); } diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm index 386c4eba..f64b2c62 100644 --- a/lib/PublicInbox/LeiXSearch.pm +++ b/lib/PublicInbox/LeiXSearch.pm @@ -69,11 +69,13 @@ sub xdb_shards_flat { @{$_[0]->{shards_flat} // []} } sub mitem_kw ($$;$) { my ($smsg, $mitem, $flagged) = @_; - my $kw = xap_terms('K', $mitem->get_document); + my $kw = xap_terms('K', my $doc = $mitem->get_document); $kw->{flagged} = 1 if $flagged; - # we keep the empty array here to prevent expensive work in + # we keep the empty {kw} array here to prevent expensive work in # ->xsmsg_vmd, _unbless_smsg will clobber it iff it's empty $smsg->{kw} = [ sort keys %$kw ]; + my $L = xap_terms('L', $doc); + $smsg->{L} = [ sort keys %$L ] if scalar(keys %$L); } # like over->get_art @@ -86,8 +88,10 @@ sub smsg_for { my $num = int(($docid - 1) / $nshard) + 1; my $ibx = $self->{shard2ibx}->[$shard]; my $smsg = $ibx->over->get_art($num); - return if $smsg->{bytes} == 0; - mitem_kw($smsg, $mitem) if $ibx->can('msg_keywords'); + return if $smsg->{bytes} == 0; # external message + if ($ibx->can('msg_keywords')) { + mitem_kw($smsg, $mitem); + } $smsg; } @@ -170,6 +174,7 @@ sub query_thread_mset { # for --threads if ($can_kw) { mitem_kw($smsg, $mitem, $fl); } elsif ($fl) { + # call ->xsmsg_vmd, later $smsg->{lei_q_tt_flagged} = 1; } } diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index c7d52daf..ab04d430 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -370,7 +370,7 @@ sub query_approxidate { sub mset { my ($self, $query_string, $opts) = @_; $opts ||= {}; - my $qp = $self->{qp} //= qparse_new($self); + my $qp = $self->{qp} //= $self->qparse_new; my $query = $qp->parse_query($query_string, $self->{qp_flags}); _do_enquire($self, $query, $opts); } @@ -463,7 +463,7 @@ sub mset_to_smsg { sub stemmer { $X{Stem}->new($LANG) } # read-only -sub qparse_new ($) { +sub qparse_new { my ($self) = @_; my $xdb = xdb($self); @@ -516,7 +516,7 @@ EOF sub help { my ($self) = @_; - $self->{qp} //= qparse_new($self); # parse altids + $self->{qp} //= $self->qparse_new; # parse altids my @ret = @HELP; if (my $user_pfx = $self->{-user_pfx}) { push @ret, @$user_pfx; diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 7d46489c..ca1f3588 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -35,7 +35,7 @@ use constant DEBUG => !!$ENV{DEBUG}; my $xapianlevels = qr/\A(?:full|medium)\z/; my $hex = '[a-f0-9]'; my $OID = $hex .'{40,}'; -my @VMD_MAP = (kw => 'K', label => 'L'); +my @VMD_MAP = (kw => 'K', L => 'L'); our $INDEXLEVELS = qr/\A(?:full|medium|basic)\z/; sub new { diff --git a/t/lei-mark.t b/t/lei-mark.t index 76995589..23f5002e 100644 --- a/t/lei-mark.t +++ b/t/lei-mark.t @@ -4,22 +4,32 @@ use strict; use v5.10.1; use PublicInbox::TestCommon; require_git 2.6; require_mods(qw(json DBD::SQLite Search::Xapian)); +my ($ro_home, $cfg_path) = setup_public_inboxes; my $check_kw = sub { my ($exp, %opt) = @_; + my $args = $opt{args} // []; my $mid = $opt{mid} // 'testmessage@example.com'; - lei_ok('q', "m:$mid"); + lei_ok('q', "m:$mid", @$args); my $res = json_utf8->decode($lei_out); is($res->[1], undef, 'only got one result'); my $msg = $opt{msg} ? " $opt{msg}" : ''; ($exp ? is_deeply($res->[0]->{kw}, $exp, "got @$exp$msg") : is($res->[0]->{kw}, undef, "got undef$msg")) or diag explain($res); + if (exists $opt{L}) { + $exp = $opt{L}; + ($exp ? is_deeply($res->[0]->{L}, $exp, "got @$exp$msg") + : is($res->[0]->{L}, undef, "got undef$msg")) or + diag explain($res); + } }; test_lei(sub { + lei_ok(qw(ls-label)); is($lei_out, '', 'no labels, yet'); lei_ok(qw(import -F eml t/utf8.eml)); - lei_ok(qw(mark -F eml t/utf8.eml +kw:flagged)); - $check_kw->(['flagged']); + lei_ok(qw(mark -F eml t/utf8.eml +kw:flagged +L:urgent)); + $check_kw->(['flagged'], L => ['urgent']); + lei_ok(qw(ls-label)); is($lei_out, "urgent\n", 'label found'); ok(!lei(qw(mark -F eml t/utf8.eml +kw:seeen)), 'bad kw rejected'); like($lei_err, qr/`seeen' is not one of/, 'got helpful error'); ok(!lei(qw(mark -F eml t/utf8.eml +k:seen)), 'bad prefix rejected'); @@ -41,7 +51,35 @@ test_lei(sub { $check_kw->(['answered'], msg => 'Maildir Status ignored'); open my $in, '<', 't/utf8.eml' or BAIL_OUT $!; - lei_ok([qw(mark -F eml - +kw:seen)], undef, { %$lei_opt, 0 => $in }); + lei_ok([qw(mark -F eml - +kw:seen +L:nope)], + undef, { %$lei_opt, 0 => $in }); $check_kw->(['answered', 'seen'], msg => 'stdin works'); + lei_ok(qw(q L:urgent)); + my $res = json_utf8->decode($lei_out); + is($res->[0]->{'m'}, 'testmessage@example.com', 'L: query works'); + lei_ok(qw(q kw:seen)); + my $r2 = json_utf8->decode($lei_out); + is_deeply($r2, $res, 'kw: query works, too') or + diag explain([$r2, $res]); + + lei_ok(qw(_complete lei mark)); + my %c = map { $_ => 1 } split(/\s+/, $lei_out); + ok($c{'+L:urgent'} && $c{'-L:urgent'} && + $c{'+L:nope'} && $c{'-L:nope'}, 'completed with labels'); + + my $mid = 'qp@example.com'; + lei_ok qw(q -f mboxrd --only), "$ro_home/t2", "mid:$mid"; + $in = $lei_out; + lei_ok [qw(mark -F mboxrd --stdin +kw:seen +L:qp)], + undef, { %$lei_opt, 0 => \$in }; + $check_kw->(['seen'], L => ['qp'], mid => $mid, + args => [ '--only', "$ro_home/t2" ], + msg => 'external-only message'); + lei_ok(qw(ls-label)); + is($lei_out, "nope\nqp\nurgent\n", 'ls-label shows qp'); + + if (0) { # TODO label+kw search w/ externals + lei_ok(qw(q L:qp), "mid:$mid", '--only', "$ro_home/t2"); + } }); done_testing;