From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.2 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF shortcircuit=no autolearn=ham autolearn_force=no version=3.4.6 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 1B2BD1F4C1; Fri, 25 Oct 2024 03:19:58 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1729826398; bh=+AGveQXWsu7VdtQi3JBcQJBBwt7bKMmeR7HYQcMv+G8=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=oPGFpvAhG8kSdbQx/qN1Z37FtmNJ6qTvInAK051pb9qxF+Uo4xZzdcsCpzJG5vpgu PmRCq9X4qAfUhlBmb0lsCc4b+uyEO0q31G3lWFNZKoKHqziD3ps0nbVPZoWJC98r59 BkqiWc6BtRbcr9tveCj3do5/F+7m/J5tWyeXDBB8= From: Eric Wong To: meta@public-inbox.org Cc: Konstantin Ryabitsev Subject: [PATCH 1/3] learn: support --keep-going/-k switch Date: Fri, 25 Oct 2024 03:19:55 +0000 Message-ID: <20241025031957.1305385-2-e@80x24.org> In-Reply-To: <20241025031957.1305385-1-e@80x24.org> References: <20241025031957.1305385-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: Inspired by make(1), this switch allows -learn to work with config files which contain both read-only and read-write inboxes. The remove_or_add() sub is now anonymous and locally-scoped to facilitate `@fail_ibx' variable sharing use when the entire script is made into a subroutine for `make check-run' (via TestCommon->key2sub). A subsequent commit will reduce needless parameter passing of global variables for readability. Requested-by: Konstantin Ryabitsev Link: https://public-inbox.org/meta/20241024-nimble-honest-stallion-dad849@lemur/ --- Documentation/public-inbox-learn.pod | 25 +++++++- MANIFEST | 1 + script/public-inbox-learn | 95 ++++++++++++++++------------ t/learn.t | 85 +++++++++++++++++++++++++ 4 files changed, 164 insertions(+), 42 deletions(-) create mode 100644 t/learn.t diff --git a/Documentation/public-inbox-learn.pod b/Documentation/public-inbox-learn.pod index b08e4bc8..ccb967fa 100644 --- a/Documentation/public-inbox-learn.pod +++ b/Documentation/public-inbox-learn.pod @@ -54,9 +54,28 @@ This is similar to the C command above, but does not feed the message to L and only removes messages which match on any of the C, C, and C headers. -The C<--all> option may be used to match C semantics in removing -the message from all configured inboxes. C<--all> is only -available in public-inbox 1.6.0+. +=back + +=head1 OPTIONS + +=over + +=item --all + +C<--all> may be used with C to match C semantics in +removing a message from all configured inboxes. C<--all> is +only available in public-inbox 1.6.0+. + +=item --keep-going + +=item -k + +Continue to other inboxes after errors. This ensures C +and C can succeed on all writable inboxes and act +as a no-op on read-only inboxes. + +Useful for dealing with cases where some inboxes in PI_CONFIG +are read-only. =back diff --git a/MANIFEST b/MANIFEST index 3c4cefb4..6b759487 100644 --- a/MANIFEST +++ b/MANIFEST @@ -507,6 +507,7 @@ t/io.t t/ipc.t t/iso-2202-jp.eml t/kqnotify.t +t/learn.t t/lei-auto-watch.t t/lei-convert.t t/lei-daemon.t diff --git a/script/public-inbox-learn b/script/public-inbox-learn index a955cdf6..dae4400f 100755 --- a/script/public-inbox-learn +++ b/script/public-inbox-learn @@ -1,5 +1,5 @@ #!/usr/bin/perl -w -# Copyright (C) 2014-2021 all contributors +# Copyright (C) all contributors # License: AGPL-3.0+ # # Used for training spam (via SpamAssassin) and removing messages from a @@ -16,6 +16,7 @@ required action argument: options: --all scan all inboxes on `rm' + -k keep going after errors (for read-only inboxes) See public-inbox-learn(1) man page for full documentation. EOF @@ -27,7 +28,7 @@ use PublicInbox::Address; use PublicInbox::Spamcheck::Spamc; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); my %opt = (all => 0); -GetOptions(\%opt, qw(all help|h)) or die $help; +GetOptions(\%opt, qw(all keep-going|k help|h)) or die $help; use PublicInbox::Import; my $train = shift or die $help; @@ -40,7 +41,7 @@ my $spamc = PublicInbox::Spamcheck::Spamc->new; my $pi_cfg = PublicInbox::Config->new; local $PublicInbox::Import::DROP_UNIQUE_UNSUB; PublicInbox::Import::load_config($pi_cfg); -my $err; +my ($err, @fail_ibx); my $mime = PublicInbox::Eml->new(do{ my $data = PublicInbox::IO::read_all \*STDIN; PublicInbox::Eml::strip_from($data); @@ -52,51 +53,66 @@ my $mime = PublicInbox::Eml->new(do{ } elsif ($train eq 'spam') { $spamc->spamlearn(\$data); } - die "spamc failed with: $?\n" if $?; + die "E: spamc failed with: $?\n" if $?; }; $err = $@; } \$data }); -sub remove_or_add ($$$$) { +my $ibx_fail = sub { + my ($ibx) = @_; + my $m = "E: $@ ($ibx->{inboxdir})\n"; + die $m if !$opt{'keep-going'}; + warn $m; + push @fail_ibx, $ibx; +}; + +my $remove_or_add = sub { my ($ibx, $train, $mime, $addr) = @_; + eval { + # We do not touch GIT_COMMITTER_* env here so we can track + # who trained the message. + $ibx->{name} = $ENV{GIT_COMMITTER_NAME} // $ibx->{name}; + $ibx->{-primary_address} = $ENV{GIT_COMMITTER_EMAIL} // $addr; + $ibx = PublicInbox::InboxWritable->new($ibx); + $ibx->{indexlevel} = $ibx->detect_indexlevel; + my $im = $ibx->importer(0); - # We do not touch GIT_COMMITTER_* env here so we can track - # who trained the message. - $ibx->{name} = $ENV{GIT_COMMITTER_NAME} // $ibx->{name}; - $ibx->{-primary_address} = $ENV{GIT_COMMITTER_EMAIL} // $addr; - $ibx = PublicInbox::InboxWritable->new($ibx); - $ibx->{indexlevel} = $ibx->detect_indexlevel; - my $im = $ibx->importer(0); - - if ($train eq "rm") { - # This needs to be idempotent, as my inotify trainer - # may train for each cross-posted message, and this - # script already learns for every list in - # ~/.public-inbox/config - $im->remove($mime, $train); - } elsif ($train eq "ham") { - # no checking for spam here, we assume the message has - # been reviewed by a human at this point: - PublicInbox::MDA->set_list_headers($mime, $ibx); - - # Ham messages are trained when they're marked into - # a SEEN state, so this is idempotent: - $im->add($mime); - } - $im->done; -} + if ($train eq "rm") { + # This needs to be idempotent, as my inotify trainer + # may train for each cross-posted message, and this + # script already learns for every list in + # ~/.public-inbox/config + $im->remove($mime, $train); + } elsif ($train eq "ham") { + # no checking for spam here, we assume the message has + # been reviewed by a human at this point: + PublicInbox::MDA->set_list_headers($mime, $ibx); + + # Ham messages are trained when they're marked into + # a SEEN state, so this is idempotent: + $im->add($mime); + } + $im->done; + }; + $ibx_fail->($ibx) if $@; +}; -# spam is removed from all known inboxes since it is often Bcc:-ed -if ($train eq 'spam' || ($train eq 'rm' && $opt{all})) { - $pi_cfg->each_inbox(sub { - my ($ibx) = @_; +my $remove_all = sub { # each_inbox cb + my ($ibx) = @_; + eval { $ibx = PublicInbox::InboxWritable->new($ibx); my $im = $ibx->importer(0); $im->remove($mime, $train); $im->done; - }); + }; + $ibx_fail->($ibx) if $@; +}; + +# spam is removed from all known inboxes since it is often Bcc:-ed +if ($train eq 'spam' || ($train eq 'rm' && $opt{all})) { + $pi_cfg->each_inbox($remove_all); } else { require PublicInbox::MDA; @@ -114,16 +130,17 @@ if ($train eq 'spam' || ($train eq 'rm' && $opt{all})) { while (my ($addr, $ibx) = each %dests) { next unless ref($ibx); # $ibx may be 0 next if $seen{0 + $ibx}++; - remove_or_add($ibx, $train, $mime, $addr); + $remove_or_add->($ibx, $train, $mime, $addr); } my $dests = PublicInbox::MDA->inboxes_for_list_id($pi_cfg, $mime); for my $ibx (@$dests) { next if $seen{0 + $ibx}++; - remove_or_add($ibx, $train, $mime, $ibx->{-primary_address}); + $remove_or_add->($ibx, $train, $mime, $ibx->{-primary_address}); } } -if ($err) { - warn $err; +if ($err || @fail_ibx) { + warn $err if $err; + warn 'E: ', scalar(@fail_ibx), " inbox(es) failed\n"; exit 1; } diff --git a/t/learn.t b/t/learn.t new file mode 100644 index 00000000..0c848459 --- /dev/null +++ b/t/learn.t @@ -0,0 +1,85 @@ +# Copyright (C) all contributors +# License: AGPL-3.0+ +use v5.12; +use autodie; +use PublicInbox::TestCommon; +plan skip_all => "cannot test $0 as root" if $> == 0; +require_mods qw(DBD::SQLite); +my ($v2, $v2ro, @v2cfg); +my $tmpdir = tmpdir; +my $eml = eml_load 't/plack-qp.eml'; +my $v1 = create_inbox 'v1', indexlevel => 'basic', tmpdir => "$tmpdir/v1", sub { + my ($im) = @_; + $im->add($eml); +}; + +my $v1ro = create_inbox 'v1ro', indexlevel => 'basic', + tmpdir => "$tmpdir/v1ro", sub { + my ($im) = @_; + $im->add($eml); +}; +chmod 0500, $v1ro->{inboxdir}; +chmod 0400, glob("$v1ro->{inboxdir}/public-inbox/xapian*/over.sqlite3"), + glob("$v1ro->{inboxdir}/public-inbox/msgmap.sqlite3"); + +SKIP: { + require_git v2.6, 1; + $v2 = create_inbox 'v2', indexlevel => 'basic', version => 2, + tmpdir => "$tmpdir/v2", sub { + my ($im, $ibx) = @_; + $im->add($eml); + }; + $v2ro = create_inbox 'v2', indexlevel => 'basic', version => 2, + tmpdir => "$tmpdir/v2ro", sub { + my ($im, $ibx) = @_; + $im->add($eml); + }; + chmod 0500, $v2ro->{inboxdir}, "$v2ro->{inboxdir}/git/0.git"; + chmod 0400, glob("$v2ro->{inboxdir}/xap*/over.sqlite3"), + glob("$v2ro->{inboxdir}/msgmap.sqlite3"); + @v2cfg = (<{inboxdir}; + address = v2ro\@example.com + indexlevel = basic +[publicinbox "v2"] + inboxdir = $v2->{inboxdir}; + address = v2\@example.com + indexlevel = basic +EOM +} + +my $cfg = cfg_new $tmpdir, <{inboxdir} + address = v1ro\@example.com + indexlevel = basic +[publicinbox "v1"] + inboxdir = $v1->{inboxdir} + address = v1\@example.com + indexlevel = basic +EOM + +my $opt = { + 0 => \($eml->as_string), + 1 => \(my $out), + 2 => \(my $err), +}; +my $env = { PI_CONFIG => $cfg->{-f} }; + +run_script [ qw(-learn rm --all -k) ], $env, $opt; +isnt $?, 0, 'learn $? is non-zero'; +is 0, $v1->over->max, 'removed from r/w v1'; +is 1, $v1ro->over->max, 'not removed from r/o v1'; +my $nr = 1; +SKIP: { + require_git v2.6, 1; + is 0, $v2->over->max, 'removed from r/w v2'; + is 1, $v2ro->over->max, 'not removed from r/o v2'; + $nr = 2; +} + +like $err, qr/E: $nr inbox\(es\) failed/, 'failures noted in stderr'; +is $out, '', 'stdout is empty'; + +done_testing;