unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
* [PATCH 0/7] implement purge tool
@ 2019-01-11  4:10 Eric Wong
  2019-01-11  4:10 ` [PATCH 1/7] hoist out resolve_repo_dir from -index Eric Wong
                   ` (6 more replies)
  0 siblings, 7 replies; 8+ messages in thread
From: Eric Wong @ 2019-01-11  4:10 UTC (permalink / raw)
  To: meta

Of course, I found and fixed a bunch of little purge bugs
in the process :x

Still need to WTFM so I can tell others to RTFM :>

The following changes since commit b0e5062d43a96372801713ef78a78d6a1bc852bc:

  Merge commit 'mem' (2019-01-10 21:41:55 +0000)

are available in the Git repository at:

  https://public-inbox.org/ purge

for you to fetch changes up to 440b0feaa209e12e4bcb8ef16a95041fce71e7dc:

  implement public-inbox-purge tool (2019-01-11 04:07:17 +0000)

----------------------------------------------------------------
Eric Wong (7):
      hoist out resolve_repo_dir from -index
      import: purge: reap fast-export process
      v2writable: ->purge returns undef on no-op
      v2writable: purge ignores non-existent git epoch directories
      v2writable: cleanup processes when done
      v2writable: read epoch on purge
      implement public-inbox-purge tool

 MANIFEST                      |   4 ++
 lib/PublicInbox/Admin.pm      |  44 +++++++++++++++++
 lib/PublicInbox/Import.pm     |   3 +-
 lib/PublicInbox/V2Writable.pm |  16 ++++--
 script/public-inbox-index     |  32 +-----------
 script/public-inbox-purge     | 111 ++++++++++++++++++++++++++++++++++++++++++
 t/admin.t                     |  81 ++++++++++++++++++++++++++++++
 t/purge.t                     |  97 ++++++++++++++++++++++++++++++++++++
 t/v2writable.t                |   3 ++
 9 files changed, 357 insertions(+), 34 deletions(-)
 create mode 100644 lib/PublicInbox/Admin.pm
 create mode 100755 script/public-inbox-purge
 create mode 100644 t/admin.t
 create mode 100644 t/purge.t

-- 
EW

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 1/7] hoist out resolve_repo_dir from -index
  2019-01-11  4:10 [PATCH 0/7] implement purge tool Eric Wong
@ 2019-01-11  4:10 ` Eric Wong
  2019-01-11  4:10 ` [PATCH 2/7] import: purge: reap fast-export process Eric Wong
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Eric Wong @ 2019-01-11  4:10 UTC (permalink / raw)
  To: meta

We'll be using it in future admin tools, and making this
easier-to-test.
---
 MANIFEST                  |  2 +
 lib/PublicInbox/Admin.pm  | 44 +++++++++++++++++++++
 script/public-inbox-index | 32 +---------------
 t/admin.t                 | 81 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 129 insertions(+), 30 deletions(-)
 create mode 100644 lib/PublicInbox/Admin.pm
 create mode 100644 t/admin.t

diff --git a/MANIFEST b/MANIFEST
index e4f3df8..5ac85c3 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -51,6 +51,7 @@ examples/unsubscribe.milter
 examples/unsubscribe.psgi
 examples/varnish-4.vcl
 lib/PublicInbox/Address.pm
+lib/PublicInbox/Admin.pm
 lib/PublicInbox/AltId.pm
 lib/PublicInbox/Config.pm
 lib/PublicInbox/ContentId.pm
@@ -141,6 +142,7 @@ scripts/slrnspool2maildir
 scripts/ssoma-replay
 scripts/xhdr-num2mid
 t/address.t
+t/admin.t
 t/altid.t
 t/altid_v2.t
 t/cgi.t
diff --git a/lib/PublicInbox/Admin.pm b/lib/PublicInbox/Admin.pm
new file mode 100644
index 0000000..d0a8dd0
--- /dev/null
+++ b/lib/PublicInbox/Admin.pm
@@ -0,0 +1,44 @@
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# common stuff for administrative command-line tools
+# Unstable internal API
+package PublicInbox::Admin;
+use strict;
+use warnings;
+use Cwd 'abs_path';
+use base qw(Exporter);
+our @EXPORT_OK = qw(resolve_repo_dir);
+
+sub resolve_repo_dir {
+	my ($cd, $ver) = @_;
+	my $prefix = defined $cd ? $cd : './';
+	if (-d $prefix && -f "$prefix/inbox.lock") { # v2
+		$$ver = 2 if $ver;
+		return abs_path($prefix);
+	}
+
+	my @cmd = qw(git rev-parse --git-dir);
+	my $cmd = join(' ', @cmd);
+	my $pid = open my $fh, '-|';
+	defined $pid or die "forking $cmd failed: $!\n";
+	if ($pid == 0) {
+		if (defined $cd) {
+			chdir $cd or die "chdir $cd failed: $!\n";
+		}
+		exec @cmd;
+		die "Failed to exec $cmd: $!\n";
+	} else {
+		my $dir = eval {
+			local $/;
+			<$fh>;
+		};
+		close $fh or die "error in $cmd: $!\n";
+		chomp $dir;
+		$$ver = 1 if $ver;
+		return abs_path($cd) if ($dir eq '.' && defined $cd);
+		abs_path($dir);
+	}
+}
+
+1;
diff --git a/script/public-inbox-index b/script/public-inbox-index
index 73ad9bc..32121f6 100755
--- a/script/public-inbox-index
+++ b/script/public-inbox-index
@@ -9,9 +9,10 @@
 use strict;
 use warnings;
 use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev);
-use Cwd 'abs_path';
 my $usage = "public-inbox-index REPO_DIR";
 use PublicInbox::Config;
+use PublicInbox::Admin qw(resolve_repo_dir);
+
 my $config = eval { PublicInbox::Config->new } || eval {
 	warn "public-inbox unconfigured for serving, indexing anyways...\n";
 	{}
@@ -35,35 +36,6 @@ die "--jobs must be positive\n" if defined $jobs && $jobs < 0;
 
 my @dirs;
 
-sub resolve_repo_dir {
-	my ($cd) = @_;
-	my $prefix = defined $cd ? $cd : './';
-	if (-d $prefix && -f "$prefix/inbox.lock") { # v2
-		return abs_path($prefix);
-	}
-
-	my @cmd = qw(git rev-parse --git-dir);
-	my $cmd = join(' ', @cmd);
-	my $pid = open my $fh, '-|';
-	defined $pid or die "forking $cmd failed: $!\n";
-	if ($pid == 0) {
-		if (defined $cd) {
-			chdir $cd or die "chdir $cd failed: $!\n";
-		}
-		exec @cmd;
-		die "Failed to exec $cmd: $!\n";
-	} else {
-		my $dir = eval {
-			local $/;
-			<$fh>;
-		};
-		close $fh or die "error in $cmd: $!\n";
-		chomp $dir;
-		return abs_path($cd) if ($dir eq '.' && defined $cd);
-		abs_path($dir);
-	}
-}
-
 if (@ARGV) {
 	@dirs = map { resolve_repo_dir($_) } @ARGV;
 } else {
diff --git a/t/admin.t b/t/admin.t
new file mode 100644
index 0000000..cc1e65d
--- /dev/null
+++ b/t/admin.t
@@ -0,0 +1,81 @@
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Test::More;
+use File::Temp qw(tempdir);
+# use Cwd qw(getcwd);
+use_ok 'PublicInbox::Admin', qw(resolve_repo_dir);
+my $tmpdir = tempdir('pi-admin.XXXXXX', TMPDIR => 1, CLEANUP => 1);
+my $git_dir = "$tmpdir/v1";
+my $v2_dir = "$tmpdir/v2";
+my ($res, $err, $v);
+
+is(0, system(qw(git init -q --bare), $git_dir), 'git init v1');
+
+# v1
+is(resolve_repo_dir($git_dir), $git_dir, 'top-level GIT_DIR resolved');
+is(resolve_repo_dir("$git_dir/objects"), $git_dir, 'GIT_DIR/objects resolved');
+
+ok(chdir($git_dir), 'chdir GIT_DIR works');
+is(resolve_repo_dir(), $git_dir, 'resolve_repo_dir works in GIT_DIR');
+
+ok(chdir("$git_dir/objects"), 'chdir GIT_DIR/objects works');
+is(resolve_repo_dir(), $git_dir, 'resolve_repo_dir works in GIT_DIR');
+$res = resolve_repo_dir(undef, \$v);
+is($v, 1, 'version 1 detected');
+is($res, $git_dir, 'detects directory along with version');
+
+# $tmpdir could be inside a git working, directory, so we test '/'
+SKIP: {
+	my $no_vcs_dir = '/';
+	# do people version-control "/"?
+	skip "$no_vcs_dir is version controlled by git", 4 if -d '/.git';
+	open my $null, '>', '/dev/null' or die "open /dev/null: $!";
+	open my $olderr, '>&', \*STDERR or die "dup stderr: $!";
+
+	ok(chdir($no_vcs_dir), 'chdir to a non-inbox');
+	open STDERR, '>&', $null or die "redirect stderr to /dev/null: $!";
+	$res = eval { resolve_repo_dir() };
+	open STDERR, '>&', $olderr or die "restore stderr: $!";
+	is($res, undef, 'fails inside non-version-controlled dir');
+
+	ok(chdir($tmpdir), 'back to test-specific $tmpdir');
+	open STDERR, '>&', $null or die "redirect stderr to /dev/null: $!";
+	$res = eval { resolve_repo_dir($no_vcs_dir) };
+	$err = $@;
+	open STDERR, '>&', $olderr or die "restore stderr: $!";
+	is($res, undef, 'fails on non-version-controlled dir');
+	ok($err, '$@ set on failure');
+}
+
+# v2
+SKIP: {
+	for my $m (qw(DBD::SQLite Search::Xapian)) {
+		skip "$m missing", 5 unless eval "require $m";
+	}
+	use_ok 'PublicInbox::V2Writable';
+	use_ok 'PublicInbox::Inbox';
+	my $ibx = PublicInbox::Inbox->new({
+			mainrepo => $v2_dir,
+			name => 'test-v2writable',
+			version => 2,
+			-primary_address => 'test@example.com',
+			indexlevel => 'basic',
+		});
+	PublicInbox::V2Writable->new($ibx, 1)->idx_init;
+
+	ok(-e "$v2_dir/inbox.lock", 'exists');
+	is(resolve_repo_dir($v2_dir), $v2_dir,
+		'resolve_repo_dir works on v2_dir');
+	ok(chdir($v2_dir), 'chdir v2_dir OK');
+	is(resolve_repo_dir(), $v2_dir, 'resolve_repo_dir works inside v2_dir');
+	$res = resolve_repo_dir(undef, \$v);
+	is($v, 2, 'version 2 detected');
+	is($res, $v2_dir, 'detects directory along with version');
+
+	# TODO: should work from inside Xapian dirs, and git dirs, here...
+}
+
+chdir '/';
+done_testing();
-- 
EW


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/7] import: purge: reap fast-export process
  2019-01-11  4:10 [PATCH 0/7] implement purge tool Eric Wong
  2019-01-11  4:10 ` [PATCH 1/7] hoist out resolve_repo_dir from -index Eric Wong
@ 2019-01-11  4:10 ` Eric Wong
  2019-01-11  4:10 ` [PATCH 3/7] v2writable: ->purge returns undef on no-op Eric Wong
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Eric Wong @ 2019-01-11  4:10 UTC (permalink / raw)
  To: meta

Zombies are bad.
---
 lib/PublicInbox/Import.pm | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm
index fd4255c..7e596ab 100644
--- a/lib/PublicInbox/Import.pm
+++ b/lib/PublicInbox/Import.pm
@@ -495,7 +495,7 @@ sub purge_oids {
 	my $old = $self->{'ref'};
 	my $git = $self->{git};
 	my @export = (qw(fast-export --no-data --use-done-feature), $old);
-	my ($rd, $pid) = $git->popen(@export);
+	my $rd = $git->popen(@export);
 	my ($r, $w) = $self->gfi_start;
 	my @buf;
 	my $npurge = 0;
@@ -550,6 +550,7 @@ sub purge_oids {
 			push @buf, $_;
 		}
 	}
+	close $rd or die "close fast-export failed: $?";
 	if (@buf) {
 		$w->print(@buf) or wfail;
 	}
-- 
EW


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 3/7] v2writable: ->purge returns undef on no-op
  2019-01-11  4:10 [PATCH 0/7] implement purge tool Eric Wong
  2019-01-11  4:10 ` [PATCH 1/7] hoist out resolve_repo_dir from -index Eric Wong
  2019-01-11  4:10 ` [PATCH 2/7] import: purge: reap fast-export process Eric Wong
@ 2019-01-11  4:10 ` Eric Wong
  2019-01-11  4:10 ` [PATCH 4/7] v2writable: purge ignores non-existent git epoch directories Eric Wong
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Eric Wong @ 2019-01-11  4:10 UTC (permalink / raw)
  To: meta

And doesn't try to access undef as an array ref.
---
 lib/PublicInbox/V2Writable.pm | 2 +-
 t/v2writable.t                | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index 222df5c..08d18fc 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -390,7 +390,7 @@ sub purge {
 	my ($self, $mime) = @_;
 	my $purges = $self->{-inbox}->with_umask(sub {
 		remove_internal($self, $mime, undef, {});
-	});
+	}) or return;
 	$self->idx_init if @$purges; # ->done is called on purges
 	for my $i (0..$#$purges) {
 		defined(my $cmt = $purges->[$i]) or next;
diff --git a/t/v2writable.t b/t/v2writable.t
index ec9f56d..f171417 100644
--- a/t/v2writable.t
+++ b/t/v2writable.t
@@ -247,6 +247,9 @@ EOF
 	ok(my $cmts = $im->purge($mime), 'purged message');
 	like($cmts->[0], qr/\A[a-f0-9]{40}\z/, 'purge returned current commit');
 	$im->done;
+
+	# again
+	is($im->purge($mime), undef, 'no-op returns undef');
 }
 
 {
-- 
EW


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 4/7] v2writable: purge ignores non-existent git epoch directories
  2019-01-11  4:10 [PATCH 0/7] implement purge tool Eric Wong
                   ` (2 preceding siblings ...)
  2019-01-11  4:10 ` [PATCH 3/7] v2writable: ->purge returns undef on no-op Eric Wong
@ 2019-01-11  4:10 ` Eric Wong
  2019-01-11  4:10 ` [PATCH 5/7] v2writable: cleanup processes when done Eric Wong
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Eric Wong @ 2019-01-11  4:10 UTC (permalink / raw)
  To: meta

We don't require every git epoch to exist since we support
the --skip feature in public-inbox-init.
---
 lib/PublicInbox/V2Writable.pm | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index 08d18fc..970244e 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -286,7 +286,9 @@ sub purge_oids {
 	my $pfx = "$self->{-inbox}->{mainrepo}/git";
 	my $purges = [];
 	foreach my $i (0..$self->{epoch_max}) {
-		my $git = PublicInbox::Git->new("$pfx/$i.git");
+		my $git_dir = "$pfx/$i.git";
+		-d $git_dir or next;
+		my $git = PublicInbox::Git->new($git_dir);
 		my $im = $self->import_init($git, 0, 1);
 		$purges->[$i] = $im->purge_oids($purge);
 	}
-- 
EW


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 5/7] v2writable: cleanup processes when done
  2019-01-11  4:10 [PATCH 0/7] implement purge tool Eric Wong
                   ` (3 preceding siblings ...)
  2019-01-11  4:10 ` [PATCH 4/7] v2writable: purge ignores non-existent git epoch directories Eric Wong
@ 2019-01-11  4:10 ` Eric Wong
  2019-01-11  4:10 ` [PATCH 6/7] v2writable: read epoch on purge Eric Wong
  2019-01-11  4:10 ` [PATCH 7/7] implement public-inbox-purge tool Eric Wong
  6 siblings, 0 replies; 8+ messages in thread
From: Eric Wong @ 2019-01-11  4:10 UTC (permalink / raw)
  To: meta

Otherwise, Perl may exit successfully when a failure code
is desired.
---
 lib/PublicInbox/V2Writable.pm | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index 970244e..ec28e51 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -291,6 +291,7 @@ sub purge_oids {
 		my $git = PublicInbox::Git->new($git_dir);
 		my $im = $self->import_init($git, 0, 1);
 		$purges->[$i] = $im->purge_oids($purge);
+		$im->done;
 	}
 	$purges;
 }
@@ -499,6 +500,7 @@ sub done {
 	delete $self->{bnote};
 	$self->{transact_bytes} = 0;
 	$self->lock_release if $parts;
+	$self->{-inbox}->git->cleanup;
 }
 
 sub git_init {
-- 
EW


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 6/7] v2writable: read epoch on purge
  2019-01-11  4:10 [PATCH 0/7] implement purge tool Eric Wong
                   ` (4 preceding siblings ...)
  2019-01-11  4:10 ` [PATCH 5/7] v2writable: cleanup processes when done Eric Wong
@ 2019-01-11  4:10 ` Eric Wong
  2019-01-11  4:10 ` [PATCH 7/7] implement public-inbox-purge tool Eric Wong
  6 siblings, 0 replies; 8+ messages in thread
From: Eric Wong @ 2019-01-11  4:10 UTC (permalink / raw)
  To: meta

A stand-alone tool for purge will won't know the epoch
if nothing was ->add()-ed before.
---
 lib/PublicInbox/V2Writable.pm | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index ec28e51..1f17fe2 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -285,7 +285,13 @@ sub purge_oids {
 	$self->done;
 	my $pfx = "$self->{-inbox}->{mainrepo}/git";
 	my $purges = [];
-	foreach my $i (0..$self->{epoch_max}) {
+	my $max = $self->{epoch_max};
+
+	unless (defined($max)) {
+		defined(my $latest = git_dir_latest($self, \$max)) or return;
+		$self->{epoch_max} = $max;
+	}
+	foreach my $i (0..$max) {
 		my $git_dir = "$pfx/$i.git";
 		-d $git_dir or next;
 		my $git = PublicInbox::Git->new($git_dir);
-- 
EW


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 7/7] implement public-inbox-purge tool
  2019-01-11  4:10 [PATCH 0/7] implement purge tool Eric Wong
                   ` (5 preceding siblings ...)
  2019-01-11  4:10 ` [PATCH 6/7] v2writable: read epoch on purge Eric Wong
@ 2019-01-11  4:10 ` Eric Wong
  6 siblings, 0 replies; 8+ messages in thread
From: Eric Wong @ 2019-01-11  4:10 UTC (permalink / raw)
  To: meta

Expose the ->purge functionality of V2Writable for rewriting
git history to permanently purge messages from history.  This
may be necessary for legal reasons.

Usage:

	# requires ~/.public-inbox/config
	public-inbox-purge --all </path/to/message-to-purge

	# good for testing with unconfigured inboxes:
	public-inbox-purge $INBOX_DIR </path/to/message-to-purge
---
 MANIFEST                  |   2 +
 script/public-inbox-purge | 111 ++++++++++++++++++++++++++++++++++++++
 t/purge.t                 |  97 +++++++++++++++++++++++++++++++++
 3 files changed, 210 insertions(+)
 create mode 100755 script/public-inbox-purge
 create mode 100644 t/purge.t

diff --git a/MANIFEST b/MANIFEST
index 5ac85c3..886ae6b 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -128,6 +128,7 @@ script/public-inbox-init
 script/public-inbox-learn
 script/public-inbox-mda
 script/public-inbox-nntpd
+script/public-inbox-purge
 script/public-inbox-watch
 script/public-inbox.cgi
 scripts/dc-dlvr
@@ -198,6 +199,7 @@ t/psgi_multipart_not.t
 t/psgi_search.t
 t/psgi_text.t
 t/psgi_v2.t
+t/purge.t
 t/qspawn.t
 t/reply.t
 t/search-thr-index.t
diff --git a/script/public-inbox-purge b/script/public-inbox-purge
new file mode 100755
index 0000000..688dd95
--- /dev/null
+++ b/script/public-inbox-purge
@@ -0,0 +1,111 @@
+#!/usr/bin/perl -w
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+#
+# Used for purging messages entirely from a public-inbox.  Currently
+# supports v2 inboxes only, for now.
+use strict;
+use warnings;
+use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev);
+use PublicInbox::Config;
+use PublicInbox::MIME;
+use PublicInbox::Admin qw(resolve_repo_dir);
+use PublicInbox::Filter::Base;
+*REJECT = *PublicInbox::Filter::Base::REJECT;
+
+my $usage = "$0 [--all] [INBOX_DIRS] </path/to/message";
+
+eval { require PublicInbox::V2Writable } or die
+	"DBI, DBD::SQLite and Search::Xapian required for purge\n";
+my $config = eval { PublicInbox::Config->new };
+my $cfgfile = PublicInbox::Config::default_file();
+my ($all, $force);
+my $verbose = 1;
+my %opts = (
+	'all' => \$all,
+	'force|f' => \$force,
+	'verbose|v!' => \$verbose,
+);
+GetOptions(%opts) or die "bad command-line args\n", $usage, "\n";
+
+# TODO: clean this up and share code with -index via ::Admin
+my %dir2ibx; # ( path => Inbox object )
+my @inboxes;
+$config and $config->each_inbox(sub {
+	my ($ibx) = @_;
+	push @inboxes, $ibx if $all && $ibx->{version} != 1;
+	$dir2ibx{$ibx->{mainrepo}} = $ibx;
+});
+
+if ($all) {
+	$config or die "--all specified, but $cfgfile not readable\n";
+	@ARGV and die "--all specified, but directories specified\n";
+} else {
+	my @err;
+	my @dirs = scalar(@ARGV) ? @ARGV : ('.');
+	my $u = 0;
+
+	foreach my $dir (@dirs) {
+		my $v;
+		my $dir = resolve_repo_dir($dir, \$v);
+		if ($v == 1) {
+			push @err, $dir;
+			next;
+		}
+		my $ibx = $dir2ibx{$dir} ||= do {
+			warn "$dir not configured in $cfgfile\n";
+			$u++;
+			my $name = "unconfigured-$u";
+			PublicInbox::Inbox->new({
+				version => 2,
+				name => $name,
+				-primary_address => "$name\@example.com",
+				mainrepo => $dir,
+			});
+		};
+		push @inboxes, $ibx;
+	}
+
+	if (@err) {
+		die "v1 inboxes currently not supported by -purge\n\t",
+		    join("\n\t", @err), "\n";
+	}
+}
+
+my $data = do { local $/; scalar <STDIN> };
+$data =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
+my $n_purged = 0;
+
+foreach my $ibx (@inboxes) {
+	my $mime = PublicInbox::MIME->new($data);
+	my $v2w = PublicInbox::V2Writable->new($ibx, 0);
+
+	my $commits = $v2w->purge($mime) || [];
+
+	if (my $scrub = $ibx->filter($v2w)) {
+		my $scrubbed = $scrub->scrub($mime, 1);
+
+		if ($scrubbed && $scrubbed != REJECT()) {
+			my $scrub_commits = $v2w->purge($scrubbed);
+			push @$commits, @$scrub_commits if $scrub_commits;
+		}
+	}
+
+	$v2w->done;
+
+	if ($verbose) { # should we consider this machine-parseable?
+		print "$ibx->{mainrepo}:";
+		if (scalar @$commits) {
+			print join("\n\t", '', @$commits), "\n";
+		} else {
+			print " NONE\n";
+		}
+	}
+	$n_purged += scalar @$commits;
+}
+
+# behave like "rm -f"
+exit(0) if ($force || $n_purged);
+
+warn "Not found\n" if $verbose;
+exit(1);
diff --git a/t/purge.t b/t/purge.t
new file mode 100644
index 0000000..9406005
--- /dev/null
+++ b/t/purge.t
@@ -0,0 +1,97 @@
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Test::More;
+use File::Temp qw/tempdir/;
+require './t/common.perl';
+require_git(2.6);
+my @mods = qw(IPC::Run DBI DBD::SQLite Search::Xapian);
+foreach my $mod (@mods) {
+	eval "require $mod";
+	plan skip_all => "missing $_ for t/purge.t" if $@;
+};
+use Cwd qw(abs_path);
+my $purge = abs_path('blib/script/public-inbox-purge');
+my $tmpdir = tempdir('pi-purge-XXXXXX', TMPDIR => 1, CLEANUP => 1);
+use_ok 'PublicInbox::V2Writable';
+my $mainrepo = "$tmpdir/v2";
+my $ibx = PublicInbox::Inbox->new({
+	mainrepo => $mainrepo,
+	name => 'test-v2purge',
+	version => 2,
+	-primary_address => 'test@example.com',
+	indexlevel => 'basic',
+});
+
+my $raw = <<'EOF';
+From: a@example.com
+To: test@example.com
+Subject: this is a subject
+Message-ID: <a-mid@b>
+Date: Fri, 02 Oct 1993 00:00:00 +0000
+
+Hello World
+
+EOF
+
+local $ENV{NPROC} = '1';
+my $cfgfile = "$tmpdir/config";
+local $ENV{PI_CONFIG} = $cfgfile;
+open my $cfg_fh, '>', $cfgfile or die "open: $!";
+
+my $v2w = PublicInbox::V2Writable->new($ibx, 1);
+my $mime = PublicInbox::MIME->new($raw);
+ok($v2w->add($mime), 'add message to be purged');
+$v2w->done;
+
+# failing cases, first:
+my $in = "$raw\nMOAR\n";
+my ($out, $err) = ('', '');
+ok(IPC::Run::run([$purge, '-f', $mainrepo], \$in, \$out, \$err),
+	'purge -f OK');
+
+$out = $err = '';
+ok(!IPC::Run::run([$purge, $mainrepo], \$in, \$out, \$err),
+	'mismatch fails without -f');
+is($? >> 8, 1, 'missed purge exits with 1');
+
+# a successful case:
+ok(IPC::Run::run([$purge, $mainrepo], \$raw, \$out, \$err), 'match OK');
+like($out, qr/^\t[a-f0-9]{40,}/m, 'removed commit noted');
+
+# add (old) vger filter to config file
+print $cfg_fh <<EOF or die "print $!";
+[publicinbox "test-v2purge"]
+	mainrepo = $mainrepo
+	address = test\@example.com
+	indexlevel = basic
+	filter = PublicInbox::Filter::Vger
+EOF
+close $cfg_fh or die "close: $!";
+
+ok($v2w->add($mime), 'add vger-signatured message to be purged');
+$v2w->done;
+
+my $pre_scrub = $raw . <<'EOF';
+
+--
+To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
+the body of a message to majordomo@vger.kernel.org
+More majordomo info at  http://vger.kernel.org/majordomo-info.html
+Please read the FAQ at  http://www.tux.org/lkml/
+EOF
+
+$out = $err = '';
+ok(chdir('/'), "chdir / OK for --all test");
+ok(IPC::Run::run([$purge, '--all'], \$pre_scrub, \$out, \$err),
+	'scrub purge OK');
+like($out, qr/^\t[a-f0-9]{40,}/m, 'removed commit noted');
+# diag "out: $out"; diag "err: $err";
+
+$out = $err = '';
+ok(!IPC::Run::run([$purge, '--all' ], \$pre_scrub, \$out, \$err),
+	'scrub purge not idempotent without -f');
+# diag "out: $out"; diag "err: $err";
+
+done_testing();
-- 
EW


^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2019-01-11  4:10 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2019-01-11  4:10 [PATCH 0/7] implement purge tool Eric Wong
2019-01-11  4:10 ` [PATCH 1/7] hoist out resolve_repo_dir from -index Eric Wong
2019-01-11  4:10 ` [PATCH 2/7] import: purge: reap fast-export process Eric Wong
2019-01-11  4:10 ` [PATCH 3/7] v2writable: ->purge returns undef on no-op Eric Wong
2019-01-11  4:10 ` [PATCH 4/7] v2writable: purge ignores non-existent git epoch directories Eric Wong
2019-01-11  4:10 ` [PATCH 5/7] v2writable: cleanup processes when done Eric Wong
2019-01-11  4:10 ` [PATCH 6/7] v2writable: read epoch on purge Eric Wong
2019-01-11  4:10 ` [PATCH 7/7] implement public-inbox-purge tool Eric Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).