From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id DD1401F92E for ; Sat, 27 Jun 2020 10:04:04 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 25/34] watch: remove {mdir} array Date: Sat, 27 Jun 2020 10:03:51 +0000 Message-Id: <20200627100400.9871-26-e@yhbt.net> In-Reply-To: <20200627100400.9871-1-e@yhbt.net> References: <20200627100400.9871-1-e@yhbt.net> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: Since we store all watched directory names as keys in %mdmap, there should be no need to keep an array of those directories around. t/watch_maildir*.t required changes to remove trained spam. Once we've trained something as spam, there shouldn't be a need to rescan it. --- lib/PublicInbox/WatchMaildir.pm | 22 ++++++++-------------- t/watch_maildir.t | 2 ++ t/watch_maildir_v2.t | 2 ++ 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm index 621d41bd81d..8d2dc432684 100644 --- a/lib/PublicInbox/WatchMaildir.pm +++ b/lib/PublicInbox/WatchMaildir.pm @@ -40,8 +40,7 @@ sub compile_watchheaders ($) { sub new { my ($class, $config) = @_; - my (%mdmap, @mdir, $spamc); - my %uniq; # directory => count + my (%mdmap, $spamc); my %imap; # url => [inbox objects] or 'watchspam' # "publicinboxwatch" is the documented namespace @@ -54,10 +53,7 @@ sub new { for my $dir (@$dirs) { if (is_maildir($dir)) { # skip "new", no MUA has seen it, yet. - my $cur = "$dir/cur"; - push @mdir, $cur; - $uniq{$cur}++; - $mdmap{$cur} = 'watchspam'; + $mdmap{"$dir/cur"} = 'watchspam'; } elsif (my $url = imap_url($dir)) { $imap{$url} = 'watchspam'; } else { @@ -83,8 +79,6 @@ sub new { my ($new, $cur) = ("$watch/new", "$watch/cur"); my $cur_dst = $mdmap{$cur} //= []; return if is_watchspam($cur, $cur_dst, $ibx); - push @mdir, $new unless $uniq{$new}++; - push @mdir, $cur unless $uniq{$cur}++; push @{$mdmap{$new} //= []}, $ibx; push @$cur_dst, $ibx; } elsif (my $url = imap_url($watch)) { @@ -96,17 +90,16 @@ sub new { } } }); - return unless scalar(@mdir) || scalar(keys %imap); my $mdre; - if (@mdir) { - $mdre = join('|', map { quotemeta($_) } @mdir); + if (scalar keys %mdmap) { + $mdre = join('|', map { quotemeta($_) } keys %mdmap); $mdre = qr!\A($mdre)/!; } + return unless $mdre || scalar(keys %imap); bless { spamcheck => $spamcheck, mdmap => \%mdmap, - mdir => \@mdir, mdre => $mdre, config => $config, imap => scalar keys %imap ? \%imap : undef, @@ -231,7 +224,8 @@ sub watch_fs_init ($) { $self->{done_timer} //= PublicInbox::DS::requeue($done); }; require PublicInbox::DirIdle; - PublicInbox::DirIdle->new($self->{mdir}, $cb); # EPOLL_CTL_ADD + # inotify_create + EPOLL_CTL_ADD + PublicInbox::DirIdle->new([keys %{$self->{mdmap}}], $cb); } # returns the git config section name, e.g [imap "imaps://user@example.com"] @@ -688,7 +682,7 @@ sub fs_scan_step { $opendirs->{$dir} = $dh if $n < 0; } if ($op && $op eq 'full') { - foreach my $dir (@{$self->{mdir}}) { + foreach my $dir (keys %{$self->{mdmap}}) { next if $opendirs->{$dir}; # already in progress my $ok = opendir(my $dh, $dir); unless ($ok) { diff --git a/t/watch_maildir.t b/t/watch_maildir.t index c8658140cf2..c44273f0519 100644 --- a/t/watch_maildir.t +++ b/t/watch_maildir.t @@ -84,6 +84,7 @@ PublicInbox::WatchMaildir->new($config)->scan('full'); is(scalar @list, 2, 'two revisions in rev-list'); @list = $git->qx(qw(ls-tree -r --name-only refs/heads/master)); is(scalar @list, 0, 'tree is empty'); +is(unlink(glob("$spamdir/cur/*")), 1, 'unlinked trained spam'); # check with scrubbing { @@ -105,6 +106,7 @@ More majordomo info at http://vger.kernel.org/majordomo-info.html\n); is(scalar @list, 0, 'tree is empty'); @list = $git->qx(qw(rev-list refs/heads/master)); is(scalar @list, 4, 'four revisions in rev-list'); + is(unlink(glob("$spamdir/cur/*")), 1, 'unlinked trained spam'); } { diff --git a/t/watch_maildir_v2.t b/t/watch_maildir_v2.t index 6cc8b6ff0e9..f5b8e932985 100644 --- a/t/watch_maildir_v2.t +++ b/t/watch_maildir_v2.t @@ -71,6 +71,7 @@ $write_spam->(); is(unlink(glob("$maildir/new/*")), 1, 'unlinked old spam'); PublicInbox::WatchMaildir->new($config)->scan('full'); is(($srch->reopen->query(''))[0], 0, 'deleted file'); +is(unlink(glob("$spamdir/cur/*")), 1, 'unlinked trained spam'); # check with scrubbing { @@ -90,6 +91,7 @@ More majordomo info at http://vger.kernel.org/majordomo-info.html\n); PublicInbox::WatchMaildir->new($config)->scan('full'); ($nr, $msgs) = $srch->reopen->query(''); is($nr, 0, 'inbox is empty again'); + is(unlink(glob("$spamdir/cur/*")), 1, 'unlinked trained spam'); } {