From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 7BB831F9FD for ; Tue, 23 Mar 2021 05:02:18 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 1/2] lei mark: command for (un)setting keywords and labels Date: Tue, 23 Mar 2021 11:02:17 +0600 Message-Id: <20210323050218.28873-2-e@80x24.org> In-Reply-To: <20210323050218.28873-1-e@80x24.org> References: <20210323050218.28873-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: Only tested for keywords and labels with file inputs, so far; but it seems to do what it needs to do. There's a bit more redundant code than I'd like, and more opportunities for code sharing in the future "lei import" will be expanded to support +kw:$KEYWORD and +L:$LABEL in the future. --- MANIFEST | 2 + lib/PublicInbox/LEI.pm | 15 ++- lib/PublicInbox/LeiImport.pm | 15 +-- lib/PublicInbox/LeiInput.pm | 11 ++- lib/PublicInbox/LeiMark.pm | 177 +++++++++++++++++++++++++++++++++++ lib/PublicInbox/LeiStore.pm | 19 ++++ lib/PublicInbox/SearchIdx.pm | 23 +++++ t/lei-mark.t | 47 ++++++++++ 8 files changed, 288 insertions(+), 21 deletions(-) create mode 100644 lib/PublicInbox/LeiMark.pm create mode 100644 t/lei-mark.t diff --git a/MANIFEST b/MANIFEST index df8440ef..87e4b616 100644 --- a/MANIFEST +++ b/MANIFEST @@ -188,6 +188,7 @@ lib/PublicInbox/LeiExternal.pm lib/PublicInbox/LeiHelp.pm lib/PublicInbox/LeiImport.pm lib/PublicInbox/LeiInput.pm +lib/PublicInbox/LeiMark.pm lib/PublicInbox/LeiMirror.pm lib/PublicInbox/LeiOverview.pm lib/PublicInbox/LeiP2q.pm @@ -377,6 +378,7 @@ t/lei-import-imap.t t/lei-import-maildir.t t/lei-import-nntp.t t/lei-import.t +t/lei-mark.t t/lei-mirror.t t/lei-p2q.t t/lei-q-kw.t diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index 1e720b89..91c95239 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -157,9 +157,10 @@ our %CMD = ( # sorted in order of importance/use: 'plonk' => [ '--threads|--from=IDENT', 'exclude mail matching From: or threads from non-Message-ID searches', qw(stdin| threads|t from|f=s mid=s oid=s), @c_opt ], -'mark' => [ 'MESSAGE_FLAGS...', - 'set/unset keywords on message(s) from stdin', - qw(stdin| oid=s exact by-mid|mid:s), @c_opt ], +'mark' => [ 'KEYWORDS...', + 'set/unset keywords on message(s)', + qw(stdin| in-format|F=s input|i=s@ oid=s@ mid=s@), @c_opt, + pass_through('-kw:foo for delete') ], 'forget' => [ '[--stdin|--oid=OID|--by-mid=MID]', "exclude message(s) on stdin from `q' search results", qw(stdin| oid=s exact by-mid|mid:s), @c_opt ], @@ -348,7 +349,7 @@ my %CONFIG_KEYS = ( 'leistore.dir' => 'top-level storage location', ); -my @WQ_KEYS = qw(lxs l2m imp mrr cnv p2q); # internal workers +my @WQ_KEYS = qw(lxs l2m imp mrr cnv p2q mark); # internal workers # pronounced "exit": x_it(1 << 8) => exit(1); x_it(13) => SIGPIPE sub x_it ($$) { @@ -460,7 +461,7 @@ sub lei_atfork_child { open STDERR, '+>&='.fileno($self->{2}) or warn "open $!"; delete $self->{0}; } - delete @$self{qw(cnv)}; + delete @$self{qw(cnv mark imp)}; for (delete @$self{qw(3 old_1 au_done)}) { close($_) if defined($_); } @@ -690,10 +691,6 @@ sub lei_show { my ($self, @argv) = @_; } -sub lei_mark { - my ($self, @argv) = @_; -} - sub _config { my ($self, @argv) = @_; my %env = (%{$self->{env}}, GIT_CONFIG => undef); diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm index 9ad2ff12..21af28a3 100644 --- a/lib/PublicInbox/LeiImport.pm +++ b/lib/PublicInbox/LeiImport.pm @@ -78,16 +78,6 @@ sub lei_import { # the main "lei import" method import_start($lei); } -sub ipc_atfork_child { - my ($self) = @_; - my $lei = $self->{lei}; - delete $lei->{imp}; # drop circular ref - $lei->lei_atfork_child; - $self->SUPER::ipc_atfork_child; - $lei->{auth}->do_auth_atfork($self) if $lei->{auth}; - undef; -} - sub _import_maildir { # maildir_each_eml cb my ($f, $kw, $eml, $sto, $set_kw) = @_; $sto->ipc_do('set_eml', $eml, $set_kw ? { kw => $kw }: ()); @@ -137,6 +127,9 @@ sub import_stdin { $self->input_fh($lei->{opt}->{'in-format'}, $in, ''); } -no warnings 'once'; # the following works even when LeiAuth is lazy-loaded +no warnings 'once'; +*ipc_atfork_child = \&PublicInbox::LeiInput::input_only_atfork_child; + +# the following works even when LeiAuth is lazy-loaded *net_merge_all = \&PublicInbox::LeiAuth::net_merge_all; 1; diff --git a/lib/PublicInbox/LeiInput.pm b/lib/PublicInbox/LeiInput.pm index 859fdb11..6ad57772 100644 --- a/lib/PublicInbox/LeiInput.pm +++ b/lib/PublicInbox/LeiInput.pm @@ -45,7 +45,7 @@ error reading $name: $! } } -sub prepare_inputs { +sub prepare_inputs { # returns undef on error my ($self, $lei, $inputs) = @_; my $in_fmt = $lei->{opt}->{'in-format'}; if ($lei->{opt}->{stdin}) { @@ -103,4 +103,13 @@ sub prepare_inputs { $self->{inputs} = $inputs; } +sub input_only_atfork_child { + my ($self) = @_; + my $lei = $self->{lei}; + $lei->lei_atfork_child; + PublicInbox::IPC::ipc_atfork_child($self); + $lei->{auth}->do_auth_atfork($self) if $lei->{auth}; + undef; +} + 1; diff --git a/lib/PublicInbox/LeiMark.pm b/lib/PublicInbox/LeiMark.pm new file mode 100644 index 00000000..aa52ad5a --- /dev/null +++ b/lib/PublicInbox/LeiMark.pm @@ -0,0 +1,177 @@ +# Copyright (C) 2021 all contributors +# License: AGPL-3.0+ + +# handles "lei mark" command +package PublicInbox::LeiMark; +use strict; +use v5.10.1; +use parent qw(PublicInbox::IPC PublicInbox::LeiInput); +use PublicInbox::Eml; +use PublicInbox::PktOp qw(pkt_do); + +# JMAP RFC 8621 4.1.1 +my @KW = (qw(seen answered flagged draft), # system + qw(forwarded phishing junk notjunk)); # reserved +# note: RFC 8621 states "Users may add arbitrary keywords to an Email", +# but is it good idea? Stick to the system and reserved ones, for now. +# The "system" ones map to Maildir flags and mbox Status/X-Status headers. +my %KW = map { $_ => 1 } @KW; +my $L_MAX = 244; # Xapian term limit - length('L') + +# RFC 8621, sec 2 (Mailboxes) a "label" for us is a JMAP Mailbox "name" +# "Servers MAY reject names that violate server policy" +my %ERR = ( + L => sub { + my ($label) = @_; + length($label) >= $L_MAX and + return "`$label' too long (must be <= $L_MAX)"; + $label =~ m{\A[a-z0-9_][a-z0-9_\-\./\@\!,]*[a-z0-9]\z} ? + undef : "`$label' is invalid"; + }, + kw => sub { + my ($kw) = @_; + $KW{$kw} ? undef : < update_vmd +sub vmd_mod_extract { + my $argv = $_[-1]; + my $vmd_mod = {}; + my @new_argv; + for my $x (@$argv) { + if ($x =~ /\A(\+|\-)(kw|L):(.+)\z/) { + my ($op, $pfx, $val) = ($1, $2, $3); + if (my $err = $ERR{$pfx}->($val)) { + push @{$vmd_mod->{err}}, $err; + } else { # set "+kw", "+L", "-L", "-kw" + push @{$vmd_mod->{$op.$pfx}}, $val; + } + } else { + push @new_argv, $x; + } + } + @$argv = @new_argv; + $vmd_mod; +} + +sub eml_cb { # used by PublicInbox::LeiInput::input_fh + my ($self, $eml) = @_; + if (my $xoids = $self->{lei}->{ale}->xoids_for($eml)) { + $self->{lei}->{sto}->ipc_do('update_xvmd', $xoids, + $self->{vmd_mod}); + } else { + ++$self->{missing}; + } +} + +sub mbox_cb { eml_cb($_[1], $_[0]) } # used by PublicInbox::LeiInput::input_fh + +sub mark_done_wait { # dwaitpid callback + my ($arg, $pid) = @_; + my ($mark, $lei) = @$arg; + $lei->child_error($?, 'non-fatal errors during mark') if $?; + my $sto = delete $lei->{sto}; + my $wait = $sto->ipc_do('done') if $sto; # PublicInbox::LeiStore::done + $lei->dclose; +} + +sub mark_done { # EOF callback for main daemon + my ($lei) = @_; + my $mark = delete $lei->{mark} or return; + $mark->wq_wait_old(\&mark_done_wait, $lei); +} + +sub net_merge_complete { # callback used by LeiAuth + my ($self) = @_; + for my $input (@{$self->{inputs}}) { + $self->wq_io_do('mark_path_url', [], $input); + } + $self->wq_close(1); +} + +sub _mark_maildir { # maildir_each_eml cb + my ($f, $kw, $eml, $self) = @_; + eml_cb($self, $eml); +} + +sub _mark_net { # imap_each, nntp_each cb + my ($url, $uid, $kw, $eml, $self) = @_; + eml_cb($self, $eml) +} + +sub lei_mark { # the "lei mark" method + my ($lei, @argv) = @_; + my $sto = $lei->_lei_store(1); + my $self = $lei->{mark} = bless { missing => 0 }, __PACKAGE__; + $sto->write_prepare($lei); + $lei->ale; # refresh and prepare + my $vmd_mod = vmd_mod_extract(\@argv); + return $lei->fail(join("\n", @{$vmd_mod->{err}})) if $vmd_mod->{err}; + $self->prepare_inputs($lei, \@argv) or return; + grep(defined, @$vmd_mod{qw(+kw +L -L -kw)}) or + return $lei->fail('no keywords or labels specified'); + my $ops = { '' => [ \&mark_done, $lei ] }; + $lei->{auth}->op_merge($ops, $self) if $lei->{auth}; + $self->{vmd_mod} = $vmd_mod; + my $op = $lei->workers_start($self, 'lei_mark', 1, $ops); + $self->wq_io_do('mark_stdin', []) if $self->{0}; + net_merge_complete($self) unless $lei->{auth}; + while ($op && $op->{sock}) { $op->event_step } +} + +sub mark_path_url { + my ($self, $input) = @_; + my $lei = $self->{lei}; + my $ifmt = lc($lei->{opt}->{'in-format'} // ''); + # TODO auto-detect? + if ($input =~ m!\Aimaps?://!i) { + $lei->{net}->imap_each($input, \&_mark_net, $self); + return; + } elsif ($input =~ m!\A(?:nntps?|s?news)://!i) { + $lei->{net}->nntp_each($input, \&_mark_net, $self); + return; + } elsif ($input =~ s!\A([a-z0-9]+):!!i) { + $ifmt = lc $1; + } + if (-f $input) { + my $m = $lei->{opt}->{'lock'} // ($ifmt eq 'eml' ? ['none'] : + PublicInbox::MboxLock->defaults); + my $mbl = PublicInbox::MboxLock->acq($input, 0, $m); + $self->input_fh($ifmt, $mbl->{fh}, $input); + } elsif (-d _ && (-d "$input/cur" || -d "$input/new")) { + return $lei->fail(<fail("$input unsupported (TODO)"); + } +} + +sub mark_stdin { + my ($self) = @_; + my $lei = $self->{lei}; + my $in = delete $self->{0}; + $self->input_fh($lei->{opt}->{'in-format'}, $in, ''); +} + +sub note_missing { + my ($self) = @_; + $self->{lei}->child_error(1 << 8) if $self->{missing}; +} + +sub ipc_atfork_child { + my ($self) = @_; + PublicInbox::LeiInput::input_only_atfork_child($self); + # this goes out-of-scope at worker process exit: + PublicInbox::OnDestroy->new($$, \¬e_missing, $self); +} + +1; diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm index b390b318..b5d43b7e 100644 --- a/lib/PublicInbox/LeiStore.pm +++ b/lib/PublicInbox/LeiStore.pm @@ -228,12 +228,30 @@ sub set_eml { set_eml_vmd($self, $eml, $vmd); } +sub update_xvmd { + my ($self, $xoids, $vmd_mod) = @_; + my $eidx = eidx_init($self); + my $oidx = $eidx->{oidx}; + my %seen; + for my $oid (keys %$xoids) { + my @docids = $oidx->blob_exists($oid) or next; + scalar(@docids) > 1 and + warn "W: $oid indexed as multiple docids: @docids\n"; + for my $docid (@docids) { + next if $seen{$docid}++; + my $idx = $eidx->idx_shard($docid); + $idx->ipc_do('update_vmd', $docid, $vmd_mod); + } + } +} + # set or update keywords for external message, called via ipc_do sub set_xvmd { my ($self, $xoids, $eml, $vmd) = @_; my $eidx = eidx_init($self); my $oidx = $eidx->{oidx}; + my %seen; # see if we can just update existing docs for my $oid (keys %$xoids) { @@ -241,6 +259,7 @@ sub set_xvmd { scalar(@docids) > 1 and warn "W: $oid indexed as multiple docids: @docids\n"; for my $docid (@docids) { + next if $seen{$docid}++; my $idx = $eidx->idx_shard($docid); $idx->ipc_do('set_vmd', $docid, $vmd); } diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 3f933121..7d46489c 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -597,6 +597,29 @@ sub remove_vmd { $self->{xdb}->replace_document($docid, $doc) if $replace; } +sub update_vmd { + my ($self, $docid, $vmd_mod) = @_; + begin_txn_lazy($self); + my $doc = _get_doc($self, $docid) or return; + my $updated = 0; + my @x = @VMD_MAP; + while (my ($field, $pfx) = splice(@x, 0, 2)) { + # field: "label" or "kw" + for my $val (@{$vmd_mod->{"-$field"} // []}) { + eval { + $doc->remove_term($pfx . $val); + ++$updated; + }; + } + for my $val (@{$vmd_mod->{"+$field"} // []}) { + $doc->add_boolean_term($pfx . $val); + ++$updated; + } + } + $self->{xdb}->replace_document($docid, $doc) if $updated; + $updated; +} + sub xdb_remove { my ($self, @docids) = @_; $self->begin_txn_lazy; diff --git a/t/lei-mark.t b/t/lei-mark.t new file mode 100644 index 00000000..ddf5634c --- /dev/null +++ b/t/lei-mark.t @@ -0,0 +1,47 @@ +#!perl -w +# Copyright (C) 2021 all contributors +# License: AGPL-3.0+ +use strict; use v5.10.1; use PublicInbox::TestCommon; +require_git 2.6; +require_mods(qw(json DBD::SQLite Search::Xapian)); +my $check_kw = sub { + my ($exp, %opt) = @_; + my $mid = $opt{mid} // 'testmessage@example.com'; + lei_ok('q', "m:$mid"); + my $res = json_utf8->decode($lei_out); + is($res->[1], undef, 'only got one result'); + my $msg = $opt{msg} ? " $opt{msg}" : ''; + ($exp ? is_deeply($res->[0]->{kw}, $exp, "got @$exp$msg") + : is($res->[0]->{kw}, undef, "got undef$msg")) or + diag explain($res); +}; + +test_lei(sub { + lei_ok(qw(import -F eml t/utf8.eml)); + lei_ok(qw(mark -F eml t/utf8.eml +kw:flagged)); + $check_kw->(['flagged']); + ok(!lei(qw(mark -F eml t/utf8.eml +kw:seeen)), 'bad kw rejected'); + like($lei_err, qr/`seeen' is not one of/, 'got helpful error'); + ok(!lei(qw(mark -F eml t/utf8.eml +k:seen)), 'bad prefix rejected'); + ok(!lei(qw(mark -F eml t/utf8.eml)), 'no keywords'); + my $mb = "$ENV{HOME}/mb"; + my $md = "$ENV{HOME}/md"; + lei_ok(qw(q m:testmessage@example.com -o), "mboxrd:$mb"); + ok(-s $mb, 'wrote mbox result'); + lei_ok(qw(q m:testmessage@example.com -o), $md); + my @fn = glob("$md/cur/*"); + scalar(@fn) == 1 or BAIL_OUT 'no mail '.explain(\@fn); + rename($fn[0], "$fn[0]S") or BAIL_OUT "rename $!"; + $check_kw->(['flagged'], msg => 'after bad request'); + lei_ok(qw(mark -F eml t/utf8.eml -kw:flagged)); + $check_kw->(undef, msg => 'keyword cleared'); + lei_ok(qw(mark -F mboxrd +kw:seen), $mb); + $check_kw->(['seen'], msg => 'mbox Status ignored'); + lei_ok(qw(mark -kw:seen +kw:answered), $md); + $check_kw->(['answered'], msg => 'Maildir Status ignored'); + + open my $in, '<', 't/utf8.eml' or BAIL_OUT $!; + lei_ok([qw(mark -F eml - +kw:seen)], undef, { %$lei_opt, 0 => $in }); + $check_kw->(['answered', 'seen'], msg => 'stdin works'); +}); +done_testing;