From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.2 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF shortcircuit=no autolearn=ham autolearn_force=no version=3.4.6 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 933BE1F4CC for ; Thu, 26 Dec 2024 21:48:51 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1735249731; bh=z8Ehm/QX9XXzU6sfEyRXXyelR4XxP0r1dy0N9xYMGWI=; h=From:To:Subject:Date:From; b=r9Z7fIr1N70cyjwTdYyY6eKnLfVaXHSFS//+qNs7blf2kjWoKmojZhY4A7PKrmplf r/xe8FdM2QyZ9irqFhEHism5IKS8ISfmCKh7vPfGxFZ92vyhzKPnbW4KKjToXmo6UN 2DztnLaY31yfPAKRgPBxwrOjapRZwyLOM/XSfoic= From: Eric Wong To: meta@public-inbox.org Subject: [PATCH] watch: don't count invalid paths against batch limit Date: Thu, 26 Dec 2024 21:48:51 +0000 Message-ID: <20241226214851.2078117-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: Invalid paths such as `.', `..', `.mh_sequence', and perhaps other implementation-specific files may throw off the count and cause premature commits. While the premature commit isn't too harmful in the common case, it's possible a pathological case of having too many non-mail entries in a directory can cause noticeable slowdowns and storage wear. So have _try_path() and _remove_spam() return a true value if a file was actually read. We'll also simplify the $inboxes check by relying simply on `eq', since the `ref' check isn't necessary as the `eq' against a ref will never match the "watchspam" literal. --- lib/PublicInbox/Watch.pm | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/lib/PublicInbox/Watch.pm b/lib/PublicInbox/Watch.pm index 0520967f..5cde1d80 100644 --- a/lib/PublicInbox/Watch.pm +++ b/lib/PublicInbox/Watch.pm @@ -207,6 +207,7 @@ sub remove_eml_i { # each_inbox callback } } +# returns true if a file was read sub _remove_spam { my ($self, $path) = @_; # path must be marked as (S)een @@ -214,6 +215,7 @@ sub _remove_spam { my $eml = eml_from_path($path) or return; local $SIG{__WARN__} = PublicInbox::Eml::warn_ignore_cb(); $self->{pi_cfg}->each_inbox(\&remove_eml_i, $self, $eml, $path); + 1; } sub import_eml ($$$) { @@ -243,13 +245,18 @@ sub import_eml ($$$) { } } -sub _try_path { +# returns true if a file was read +sub _try_path ($$) { my ($self, $path) = @_; - $path =~ $self->{d_re} or - return warn("BUG? unrecognized path: $path\n"); + if ($path !~ $self->{d_re}) { + warn "BUG? unrecognized path: $path\n"; + return; + } my $dir = $1; - my $inboxes = $self->{d_map}->{$dir} // - return warn("W: unmappable dir: $dir\n"); + my $inboxes = $self->{d_map}->{$dir} // do { + warn "W: unmappable dir: $dir\n"; + return; + }; my ($md_fl, $mh_seq); if ($self->{d_type}->{$dir} & D_MH) { $path =~ m!/([0-9]+)\z! ? ($mh_seq = $1) : return; @@ -267,13 +274,12 @@ sub _try_path { my $pfx = ($_[0] // '') =~ /^([A-Z]: )/g ? $1 : ''; $warn_cb->($pfx, "path: $path\n", @_); }; - if (!ref($inboxes) && $inboxes eq 'watchspam') { - return _remove_spam($self, $path); - } - foreach my $ibx (@$inboxes) { + return _remove_spam($self, $path) if $inboxes eq 'watchspam'; + for my $ibx (@$inboxes) { my $eml = eml_from_path($path) or next; - import_eml($self, $ibx, $eml); + import_eml($self, $ibx, $eml); # $eml may be scrubbed } + 1; } sub quit_done ($) { @@ -598,8 +604,7 @@ sub fs_scan_step { } my $n = $self->{max_batch}; while (my $fn = readdir($dh)) { - _try_path($self, "$dir/$fn"); - last if --$n < 0; + last if _try_path($self, "$dir/$fn") and --$n < 0; } if ($n < 0) { unshift @{$self->{scan_q}}, [ $dir, $dh ];