From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 74A9A1FA01 for ; Thu, 25 Feb 2021 10:11:07 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 4/4] lei q: -tt marks direct hits as "flagged" Date: Thu, 25 Feb 2021 10:11:06 +0000 Message-Id: <20210225101106.12505-5-e@80x24.org> In-Reply-To: <20210225101106.12505-1-e@80x24.org> References: <20210225101106.12505-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: This can be used to quickly distinguish messages which were direct hits when doing thread expansion vs messages that were merely part of the same thread. This is NOT mairix-derived behavior, but I occasionally found it useful when looking at results in an MUA to know whether a message was a direct hit or not. This makes "-t" consistent with non-"-t" cases as far as keyword reading goes. --- Documentation/lei-q.pod | 8 ++++++ MANIFEST | 1 + lib/PublicInbox/LEI.pm | 4 +-- lib/PublicInbox/LeiXSearch.pm | 21 +++++++++++++--- t/lei-q-thread.t | 47 +++++++++++++++++++++++++++++++++++ 5 files changed, 75 insertions(+), 6 deletions(-) create mode 100644 t/lei-q-thread.t diff --git a/Documentation/lei-q.pod b/Documentation/lei-q.pod index 75fdc613..0959beac 100644 --- a/Documentation/lei-q.pod +++ b/Documentation/lei-q.pod @@ -79,6 +79,14 @@ Augment output destination instead of clobbering it. Return all messages in the same thread as the actual match(es). +Using this twice (C<-tt>) sets the C (AKA "important") +on messages which were actual messages. This is useful to distinguish +messages which were direct hits from messages which were merely part +of the same thread. + +TODO: Warning: this flag may become persistent and saved in +lei/store unless an MUA unflags it! (Behavior undecided) + =item -d STRATEGY, --dedupe=STRATEGY Strategy for deduplicating messages: C, C, C, or diff --git a/MANIFEST b/MANIFEST index adbd108f..9cf33d48 100644 --- a/MANIFEST +++ b/MANIFEST @@ -373,6 +373,7 @@ t/lei-import-nntp.t t/lei-import.t t/lei-mirror.t t/lei-q-remote-import.t +t/lei-q-thread.t t/lei.t t/lei_dedupe.t t/lei_external.t diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index 8eb96e78..8825fa43 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -109,7 +109,7 @@ sub index_opt { # command => [ positional_args, 1-line description, Getopt::Long option spec ] our %CMD = ( # sorted in order of importance/use: 'q' => [ '--stdin|SEARCH_TERMS...', 'search for messages matching terms', qw( - save-as=s output|mfolder|o=s format|f=s dedupe|d=s threads|t augment|a + save-as=s output|mfolder|o=s format|f=s dedupe|d=s threads|t+ augment|a sort|s=s reverse|r offset=i remote! local! external! pretty include|I=s@ exclude=s@ only=s@ jobs|j=s globoff|g stdin| import-remote! @@ -233,7 +233,7 @@ my %OPTDESC = ( 'dedupe|d=s' => ['STRATEGY|content|oid|mid|none', 'deduplication strategy'], 'show threads|t' => 'display entire thread a message belongs to', -'q threads|t' => +'q threads|t+' => 'return all messages in the same threads as the actual match(es)', 'alert=s@' => ['CMD,:WINCH,:bell,', 'run command(s) or perform ops when done writing to output ' . diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm index 2d399653..eb015978 100644 --- a/lib/PublicInbox/LeiXSearch.pm +++ b/lib/PublicInbox/LeiXSearch.pm @@ -66,6 +66,13 @@ sub remotes { @{$_[0]->{remotes} // []} } # called by PublicInbox::Search::xdb sub xdb_shards_flat { @{$_[0]->{shards_flat} // []} } +sub mitem_kw ($$;$) { + my ($smsg, $mitem, $flagged) = @_; + my $kw = xap_terms('K', $mitem->get_document); + $kw->{flagged} = 1 if $flagged; + $smsg->{kw} = [ sort keys %$kw ]; +} + # like over->get_art sub smsg_for { my ($self, $mitem) = @_; @@ -76,10 +83,7 @@ sub smsg_for { my $num = int(($docid - 1) / $nshard) + 1; my $ibx = $self->{shard2ibx}->[$shard]; my $smsg = $ibx->over->get_art($num); - if (ref($ibx->can('msg_keywords'))) { - my $kw = xap_terms('K', $mitem->get_document); - $smsg->{kw} = [ sort keys %$kw ]; - } + mitem_kw($smsg, $mitem) if $ibx->can('msg_keywords'); $smsg->{docid} = $docid; $smsg; } @@ -143,6 +147,8 @@ sub query_thread_mset { # for --threads my $mo = { %{$lei->{mset_opt}} }; my $mset; my $each_smsg = $lei->{ovv}->ovv_each_smsg_cb($lei, $ibxish); + my $can_kw = !!$ibxish->can('msg_keywords'); + my $fl = $lei->{opt}->{threads} > 1; do { $mset = $srch->mset($mo->{qstr}, $mo); mset_progress($lei, $desc, $mset->size, @@ -156,6 +162,13 @@ sub query_thread_mset { # for --threads my $smsg = $over->get_art($n) or next; wait_startq($lei); my $mitem = delete $n2item{$smsg->{num}}; + if ($mitem) { + if ($can_kw) { + mitem_kw($smsg, $mitem, $fl); + } else { + $smsg->{kw} = [ 'flagged' ]; + } + } $each_smsg->($smsg, $mitem); } @{$ctx->{xids}} = (); diff --git a/t/lei-q-thread.t b/t/lei-q-thread.t new file mode 100644 index 00000000..66db28a9 --- /dev/null +++ b/t/lei-q-thread.t @@ -0,0 +1,47 @@ +#!perl -w +# Copyright (C) 2021 all contributors +# License: AGPL-3.0+ +use strict; use v5.10.1; use PublicInbox::TestCommon; +require_git 2.6; +require_mods(qw(json DBD::SQLite Search::Xapian)); +use PublicInbox::LeiToMail; +my ($ro_home, $cfg_path) = setup_public_inboxes; +test_lei(sub { + my $eml = eml_load('t/utf8.eml'); + my $buf = PublicInbox::LeiToMail::eml2mboxrd($eml, { kw => ['seen'] }); + lei_ok([qw(import -F mboxrd -)], undef, { 0 => $buf, %$lei_opt }); + + lei_ok qw(q -t m:testmessage@example.com); + my $res = json_utf8->decode($lei_out); + is_deeply($res->[0]->{kw}, [ 'seen' ], 'q -t sets keywords'); + + $eml = eml_load('t/utf8.eml'); + $eml->header_set('References', $eml->header('Message-ID')); + $eml->header_set('Message-ID', ''); + $buf = PublicInbox::LeiToMail::eml2mboxrd($eml, { kw => ['draft'] }); + lei_ok([qw(import -F mboxrd -)], undef, { 0 => $buf, %$lei_opt }); + + lei_ok qw(q -t m:testmessage@example.com); + $res = json_utf8->decode($lei_out); + is(scalar(@$res), 3, 'got 2 results'); + pop @$res; + my %m = map { $_->{'m'} => $_ } @$res; + is_deeply($m{''}->{kw}, ['seen'], + 'flag set in direct hit'); + 'TODO' or is_deeply($m{''}->{kw}, ['draft'], + 'flag set in thread hit'); + + lei_ok qw(q -t -t m:testmessage@example.com); + $res = json_utf8->decode($lei_out); + is(scalar(@$res), 3, 'got 2 results with -t -t'); + pop @$res; + %m = map { $_->{'m'} => $_ } @$res; + is_deeply($m{''}->{kw}, ['flagged', 'seen'], + 'flagged set in direct hit'); + 'TODO' or is_deeply($m{''}->{kw}, ['draft'], + 'flagged set in direct hit'); + lei_ok qw(q -t -t m:testmessage@example.com --only), "$ro_home/t2"; + $res = json_utf8->decode($lei_out); + is_deeply($res->[0]->{kw}, [ 'flagged' ], 'flagged set on external'); +}); +done_testing;