unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH] watch: don't count invalid paths against batch limit
Date: Thu, 26 Dec 2024 21:48:51 +0000	[thread overview]
Message-ID: <20241226214851.2078117-1-e@80x24.org> (raw)

Invalid paths such as `.', `..', `.mh_sequence', and perhaps
other implementation-specific files may throw off the count and
cause premature commits.  While the premature commit isn't too
harmful in the common case, it's possible a pathological case of
having too many non-mail entries in a directory can cause
noticeable slowdowns and storage wear.

So have _try_path() and _remove_spam() return a true value if
a file was actually read.  We'll also simplify the $inboxes
check by relying simply on `eq', since the `ref' check isn't
necessary as the `eq' against a ref will never match the
"watchspam" literal.
---
 lib/PublicInbox/Watch.pm | 29 +++++++++++++++++------------
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/lib/PublicInbox/Watch.pm b/lib/PublicInbox/Watch.pm
index 0520967f..5cde1d80 100644
--- a/lib/PublicInbox/Watch.pm
+++ b/lib/PublicInbox/Watch.pm
@@ -207,6 +207,7 @@ sub remove_eml_i { # each_inbox callback
 	}
 }
 
+# returns true if a file was read
 sub _remove_spam {
 	my ($self, $path) = @_;
 	# path must be marked as (S)een
@@ -214,6 +215,7 @@ sub _remove_spam {
 	my $eml = eml_from_path($path) or return;
 	local $SIG{__WARN__} = PublicInbox::Eml::warn_ignore_cb();
 	$self->{pi_cfg}->each_inbox(\&remove_eml_i, $self, $eml, $path);
+	1;
 }
 
 sub import_eml ($$$) {
@@ -243,13 +245,18 @@ sub import_eml ($$$) {
 	}
 }
 
-sub _try_path {
+# returns true if a file was read
+sub _try_path ($$) {
 	my ($self, $path) = @_;
-	$path =~ $self->{d_re} or
-		return warn("BUG? unrecognized path: $path\n");
+	if ($path !~ $self->{d_re}) {
+		warn "BUG? unrecognized path: $path\n";
+		return;
+	}
 	my $dir = $1;
-	my $inboxes = $self->{d_map}->{$dir} //
-		return warn("W: unmappable dir: $dir\n");
+	my $inboxes = $self->{d_map}->{$dir} // do {
+		warn "W: unmappable dir: $dir\n";
+		return;
+	};
 	my ($md_fl, $mh_seq);
 	if ($self->{d_type}->{$dir} & D_MH) {
 		$path =~ m!/([0-9]+)\z! ? ($mh_seq = $1) : return;
@@ -267,13 +274,12 @@ sub _try_path {
 		my $pfx = ($_[0] // '') =~ /^([A-Z]: )/g ? $1 : '';
 		$warn_cb->($pfx, "path: $path\n", @_);
 	};
-	if (!ref($inboxes) && $inboxes eq 'watchspam') {
-		return _remove_spam($self, $path);
-	}
-	foreach my $ibx (@$inboxes) {
+	return _remove_spam($self, $path) if $inboxes eq 'watchspam';
+	for my $ibx (@$inboxes) {
 		my $eml = eml_from_path($path) or next;
-		import_eml($self, $ibx, $eml);
+		import_eml($self, $ibx, $eml); # $eml may be scrubbed
 	}
+	1;
 }
 
 sub quit_done ($) {
@@ -598,8 +604,7 @@ sub fs_scan_step {
 		}
 		my $n = $self->{max_batch};
 		while (my $fn = readdir($dh)) {
-			_try_path($self, "$dir/$fn");
-			last if --$n < 0;
+			last if _try_path($self, "$dir/$fn") and --$n < 0;
 		}
 		if ($n < 0) {
 			unshift @{$self->{scan_q}}, [ $dir, $dh ];

                 reply	other threads:[~2024-12-26 21:48 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20241226214851.2078117-1-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).