From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id A20381F5AE for ; Wed, 26 May 2021 23:50:07 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH] lei rm: new command to remove messages from index Date: Wed, 26 May 2021 23:50:07 +0000 Message-Id: <20210526235007.3881-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: This is similar to "public-inbox-learn rm", but it's possible to point an entire Maildir/IMAP/mbox*/newsgroup at it. --- MANIFEST | 1 + lib/PublicInbox/LEI.pm | 5 +++- lib/PublicInbox/LeiRm.pm | 50 +++++++++++++++++++++++++++++++++++++ lib/PublicInbox/LeiStore.pm | 29 ++++++++++++++++++++- t/lei-import-maildir.t | 7 ++++++ 5 files changed, 90 insertions(+), 2 deletions(-) create mode 100644 lib/PublicInbox/LeiRm.pm diff --git a/MANIFEST b/MANIFEST index 23423e0b..0b4bb380 100644 --- a/MANIFEST +++ b/MANIFEST @@ -223,6 +223,7 @@ lib/PublicInbox/LeiP2q.pm lib/PublicInbox/LeiQuery.pm lib/PublicInbox/LeiRediff.pm lib/PublicInbox/LeiRemote.pm +lib/PublicInbox/LeiRm.pm lib/PublicInbox/LeiSavedSearch.pm lib/PublicInbox/LeiSearch.pm lib/PublicInbox/LeiStore.pm diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index 6ff249d0..7acc05bf 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -206,7 +206,10 @@ our %CMD = ( # sorted in order of importance/use: qw(verbose|v+), @c_opt ], 'edit-search' => [ 'OUTPUT', "edit saved search via `git config --edit'", @c_opt ], - +'rm' => [ '--stdin|LOCATION...', + 'remove a message from the index and prevent reindexing', + 'stdin|', # /|\z/ must be first for lone dash + @c_opt ], 'plonk' => [ '--threads|--from=IDENT', 'exclude mail matching From: or threads from non-Message-ID searches', qw(stdin| threads|t from|f=s mid=s oid=s), @c_opt ], diff --git a/lib/PublicInbox/LeiRm.pm b/lib/PublicInbox/LeiRm.pm new file mode 100644 index 00000000..185b6a15 --- /dev/null +++ b/lib/PublicInbox/LeiRm.pm @@ -0,0 +1,50 @@ +# Copyright (C) 2021 all contributors +# License: AGPL-3.0+ + +# implements the "lei rm" command, you can point this at +# an entire spam mailbox or read a message from stdin +package PublicInbox::LeiRm; +use strict; +use v5.10.1; +use parent qw(PublicInbox::IPC PublicInbox::LeiInput); + +sub input_eml_cb { # used by PublicInbox::LeiInput::input_fh + my ($self, $eml) = @_; + $self->{lei}->{sto}->ipc_do('remove_eml', $eml); +} + +sub input_mbox_cb { # MboxReader callback + my ($eml, $self) = @_; + input_eml_cb($self, $eml); +} + +sub input_net_cb { # callback for ->imap_each, ->nntp_each + my (undef, undef, $kw, $eml, $self) = @_; # @_[0,1]: url + uid ignored + input_eml_cb($self, $eml); +} + +sub input_maildir_cb { + my (undef, $kw, $eml, $self) = @_; # $_[0] $filename ignored + input_eml_cb($self, $eml); +} + +sub lei_rm { + my ($lei, @inputs) = @_; + $lei->_lei_store(1)->write_prepare($lei); + $lei->{opt}->{stdin} = 1 if !@inputs; + $lei->{opt}->{'in-format'} //= 'eml'; + my $self = bless { -wq_nr_workers => 1 }, __PACKAGE__; + $self->prepare_inputs($lei, \@inputs) or return; + my ($op_c, $ops) = $lei->workers_start($self, 1); + $lei->{wq1} = $self; + $lei->{-err_type} = 'non-fatal'; + net_merge_all_done($self) unless $lei->{auth}; + $op_c->op_wait_event($ops); +} + +no warnings 'once'; +*ipc_atfork_child = \&PublicInbox::LeiInput::input_only_atfork_child; +*net_merge_all_done = \&PublicInbox::LeiInput::input_only_net_merge_all_done; +*net_merge_all = \&PublicInbox::LeiAuth::net_merge_all; + +1; diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm index af5edbc2..6888afb4 100644 --- a/lib/PublicInbox/LeiStore.pm +++ b/lib/PublicInbox/LeiStore.pm @@ -183,7 +183,7 @@ sub add_eml_vmd { \@docids; } -sub remove_eml_vmd { +sub remove_eml_vmd { # remove just the VMD my ($self, $eml, $vmd) = @_; my ($eidx, $tl) = eidx_init($self); my @docids = _docids_for($self, $eml); @@ -204,6 +204,33 @@ sub set_sync_info { })->set_src($oidhex, $folder, $id); } +sub _remove_if_local { # git->cat_async arg + my ($bref, $oidhex, $type, $size, $self) = @_; + $self->{im}->remove($bref) if $bref; +} + +# remove the entire message from the index, does not touch mail_sync.sqlite3 +sub remove_eml { + my ($self, $eml) = @_; + my $im = $self->importer; # may create new epoch + my ($eidx, $tl) = eidx_init($self); + my $oidx = $eidx->{oidx}; + my @docids = _docids_for($self, $eml); + my $git = $eidx->git; + for my $docid (@docids) { + my $xr3 = $oidx->get_xref3($docid, 1); + for my $row (@$xr3) { + my (undef, undef, $oidbin) = @$row; + my $oidhex = unpack('H*', $oidbin); + $git->cat_async($oidhex, \&_remove_if_local, $self); + } + $eidx->idx_shard($docid)->ipc_do('xdb_remove', $docid); + $oidx->delete_by_num($docid); + } + $git->cat_async_wait; + \@docids; +} + sub add_eml { my ($self, $eml, $vmd, $xoids) = @_; my $im = $self->{-fake_im} // $self->importer; # may create new epoch diff --git a/t/lei-import-maildir.t b/t/lei-import-maildir.t index f813440a..688b10ce 100644 --- a/t/lei-import-maildir.t +++ b/t/lei-import-maildir.t @@ -68,5 +68,12 @@ test_lei(sub { $res = json_utf8->decode($lei_out); is_deeply($res, [ undef ], 'trashed message not imported') or diag explain($imp_err, $res); + + lei_ok qw(rm t/data/0001.patch); + lei_ok(qw(q s:boolean)); + is($lei_out, "[null]\n", 'removed message gone from results'); + my $g0 = "$ENV{HOME}/.local/share/lei/store/local/0.git"; + my $x = xqx(['git', "--git-dir=$g0", qw(cat-file blob HEAD:d)]); + is($?, 0, "git cat-file shows file is `d'"); }); done_testing;