unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
* [PATCH] lei rm: new command to remove messages from index
@ 2021-05-26 23:50 Eric Wong
  0 siblings, 0 replies; only message in thread
From: Eric Wong @ 2021-05-26 23:50 UTC (permalink / raw)
  To: meta

This is similar to "public-inbox-learn rm", but it's
possible to point an entire Maildir/IMAP/mbox*/newsgroup
at it.
---
 MANIFEST                    |  1 +
 lib/PublicInbox/LEI.pm      |  5 +++-
 lib/PublicInbox/LeiRm.pm    | 50 +++++++++++++++++++++++++++++++++++++
 lib/PublicInbox/LeiStore.pm | 29 ++++++++++++++++++++-
 t/lei-import-maildir.t      |  7 ++++++
 5 files changed, 90 insertions(+), 2 deletions(-)
 create mode 100644 lib/PublicInbox/LeiRm.pm

diff --git a/MANIFEST b/MANIFEST
index 23423e0b..0b4bb380 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -223,6 +223,7 @@ lib/PublicInbox/LeiP2q.pm
 lib/PublicInbox/LeiQuery.pm
 lib/PublicInbox/LeiRediff.pm
 lib/PublicInbox/LeiRemote.pm
+lib/PublicInbox/LeiRm.pm
 lib/PublicInbox/LeiSavedSearch.pm
 lib/PublicInbox/LeiSearch.pm
 lib/PublicInbox/LeiStore.pm
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index 6ff249d0..7acc05bf 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -206,7 +206,10 @@ our %CMD = ( # sorted in order of importance/use:
 		qw(verbose|v+), @c_opt ],
 'edit-search' => [ 'OUTPUT', "edit saved search via `git config --edit'",
 			@c_opt ],
-
+'rm' => [ '--stdin|LOCATION...',
+	'remove a message from the index and prevent reindexing',
+	'stdin|', # /|\z/ must be first for lone dash
+	@c_opt ],
 'plonk' => [ '--threads|--from=IDENT',
 	'exclude mail matching From: or threads from non-Message-ID searches',
 	qw(stdin| threads|t from|f=s mid=s oid=s), @c_opt ],
diff --git a/lib/PublicInbox/LeiRm.pm b/lib/PublicInbox/LeiRm.pm
new file mode 100644
index 00000000..185b6a15
--- /dev/null
+++ b/lib/PublicInbox/LeiRm.pm
@@ -0,0 +1,50 @@
+# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# implements the "lei rm" command, you can point this at
+# an entire spam mailbox or read a message from stdin
+package PublicInbox::LeiRm;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::IPC PublicInbox::LeiInput);
+
+sub input_eml_cb { # used by PublicInbox::LeiInput::input_fh
+	my ($self, $eml) = @_;
+	$self->{lei}->{sto}->ipc_do('remove_eml', $eml);
+}
+
+sub input_mbox_cb { # MboxReader callback
+	my ($eml, $self) = @_;
+	input_eml_cb($self, $eml);
+}
+
+sub input_net_cb { # callback for ->imap_each, ->nntp_each
+	my (undef, undef, $kw, $eml, $self) = @_; # @_[0,1]: url + uid ignored
+	input_eml_cb($self, $eml);
+}
+
+sub input_maildir_cb {
+	my (undef, $kw, $eml, $self) = @_; # $_[0] $filename ignored
+	input_eml_cb($self, $eml);
+}
+
+sub lei_rm {
+	my ($lei, @inputs) = @_;
+	$lei->_lei_store(1)->write_prepare($lei);
+	$lei->{opt}->{stdin} = 1 if !@inputs;
+	$lei->{opt}->{'in-format'} //= 'eml';
+	my $self = bless { -wq_nr_workers => 1 }, __PACKAGE__;
+	$self->prepare_inputs($lei, \@inputs) or return;
+	my ($op_c, $ops) = $lei->workers_start($self, 1);
+	$lei->{wq1} = $self;
+	$lei->{-err_type} = 'non-fatal';
+	net_merge_all_done($self) unless $lei->{auth};
+	$op_c->op_wait_event($ops);
+}
+
+no warnings 'once';
+*ipc_atfork_child = \&PublicInbox::LeiInput::input_only_atfork_child;
+*net_merge_all_done = \&PublicInbox::LeiInput::input_only_net_merge_all_done;
+*net_merge_all = \&PublicInbox::LeiAuth::net_merge_all;
+
+1;
diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm
index af5edbc2..6888afb4 100644
--- a/lib/PublicInbox/LeiStore.pm
+++ b/lib/PublicInbox/LeiStore.pm
@@ -183,7 +183,7 @@ sub add_eml_vmd {
 	\@docids;
 }
 
-sub remove_eml_vmd {
+sub remove_eml_vmd { # remove just the VMD
 	my ($self, $eml, $vmd) = @_;
 	my ($eidx, $tl) = eidx_init($self);
 	my @docids = _docids_for($self, $eml);
@@ -204,6 +204,33 @@ sub set_sync_info {
 	})->set_src($oidhex, $folder, $id);
 }
 
+sub _remove_if_local { # git->cat_async arg
+	my ($bref, $oidhex, $type, $size, $self) = @_;
+	$self->{im}->remove($bref) if $bref;
+}
+
+# remove the entire message from the index, does not touch mail_sync.sqlite3
+sub remove_eml {
+	my ($self, $eml) = @_;
+	my $im = $self->importer; # may create new epoch
+	my ($eidx, $tl) = eidx_init($self);
+	my $oidx = $eidx->{oidx};
+	my @docids = _docids_for($self, $eml);
+	my $git = $eidx->git;
+	for my $docid (@docids) {
+		my $xr3 = $oidx->get_xref3($docid, 1);
+		for my $row (@$xr3) {
+			my (undef, undef, $oidbin) = @$row;
+			my $oidhex = unpack('H*', $oidbin);
+			$git->cat_async($oidhex, \&_remove_if_local, $self);
+		}
+		$eidx->idx_shard($docid)->ipc_do('xdb_remove', $docid);
+		$oidx->delete_by_num($docid);
+	}
+	$git->cat_async_wait;
+	\@docids;
+}
+
 sub add_eml {
 	my ($self, $eml, $vmd, $xoids) = @_;
 	my $im = $self->{-fake_im} // $self->importer; # may create new epoch
diff --git a/t/lei-import-maildir.t b/t/lei-import-maildir.t
index f813440a..688b10ce 100644
--- a/t/lei-import-maildir.t
+++ b/t/lei-import-maildir.t
@@ -68,5 +68,12 @@ test_lei(sub {
 	$res = json_utf8->decode($lei_out);
 	is_deeply($res, [ undef ], 'trashed message not imported')
 			or diag explain($imp_err, $res);
+
+	lei_ok qw(rm t/data/0001.patch);
+	lei_ok(qw(q s:boolean));
+	is($lei_out, "[null]\n", 'removed message gone from results');
+	my $g0 = "$ENV{HOME}/.local/share/lei/store/local/0.git";
+	my $x = xqx(['git', "--git-dir=$g0", qw(cat-file blob HEAD:d)]);
+	is($?, 0, "git cat-file shows file is `d'");
 });
 done_testing;

^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2021-05-26 23:50 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-26 23:50 [PATCH] lei rm: new command to remove messages from index Eric Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).