unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
* [PATCH] watch: don't count invalid paths against batch limit
@ 2024-12-26 21:48 Eric Wong
  0 siblings, 0 replies; only message in thread
From: Eric Wong @ 2024-12-26 21:48 UTC (permalink / raw)
  To: meta

Invalid paths such as `.', `..', `.mh_sequence', and perhaps
other implementation-specific files may throw off the count and
cause premature commits.  While the premature commit isn't too
harmful in the common case, it's possible a pathological case of
having too many non-mail entries in a directory can cause
noticeable slowdowns and storage wear.

So have _try_path() and _remove_spam() return a true value if
a file was actually read.  We'll also simplify the $inboxes
check by relying simply on `eq', since the `ref' check isn't
necessary as the `eq' against a ref will never match the
"watchspam" literal.
---
 lib/PublicInbox/Watch.pm | 29 +++++++++++++++++------------
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/lib/PublicInbox/Watch.pm b/lib/PublicInbox/Watch.pm
index 0520967f..5cde1d80 100644
--- a/lib/PublicInbox/Watch.pm
+++ b/lib/PublicInbox/Watch.pm
@@ -207,6 +207,7 @@ sub remove_eml_i { # each_inbox callback
 	}
 }
 
+# returns true if a file was read
 sub _remove_spam {
 	my ($self, $path) = @_;
 	# path must be marked as (S)een
@@ -214,6 +215,7 @@ sub _remove_spam {
 	my $eml = eml_from_path($path) or return;
 	local $SIG{__WARN__} = PublicInbox::Eml::warn_ignore_cb();
 	$self->{pi_cfg}->each_inbox(\&remove_eml_i, $self, $eml, $path);
+	1;
 }
 
 sub import_eml ($$$) {
@@ -243,13 +245,18 @@ sub import_eml ($$$) {
 	}
 }
 
-sub _try_path {
+# returns true if a file was read
+sub _try_path ($$) {
 	my ($self, $path) = @_;
-	$path =~ $self->{d_re} or
-		return warn("BUG? unrecognized path: $path\n");
+	if ($path !~ $self->{d_re}) {
+		warn "BUG? unrecognized path: $path\n";
+		return;
+	}
 	my $dir = $1;
-	my $inboxes = $self->{d_map}->{$dir} //
-		return warn("W: unmappable dir: $dir\n");
+	my $inboxes = $self->{d_map}->{$dir} // do {
+		warn "W: unmappable dir: $dir\n";
+		return;
+	};
 	my ($md_fl, $mh_seq);
 	if ($self->{d_type}->{$dir} & D_MH) {
 		$path =~ m!/([0-9]+)\z! ? ($mh_seq = $1) : return;
@@ -267,13 +274,12 @@ sub _try_path {
 		my $pfx = ($_[0] // '') =~ /^([A-Z]: )/g ? $1 : '';
 		$warn_cb->($pfx, "path: $path\n", @_);
 	};
-	if (!ref($inboxes) && $inboxes eq 'watchspam') {
-		return _remove_spam($self, $path);
-	}
-	foreach my $ibx (@$inboxes) {
+	return _remove_spam($self, $path) if $inboxes eq 'watchspam';
+	for my $ibx (@$inboxes) {
 		my $eml = eml_from_path($path) or next;
-		import_eml($self, $ibx, $eml);
+		import_eml($self, $ibx, $eml); # $eml may be scrubbed
 	}
+	1;
 }
 
 sub quit_done ($) {
@@ -598,8 +604,7 @@ sub fs_scan_step {
 		}
 		my $n = $self->{max_batch};
 		while (my $fn = readdir($dh)) {
-			_try_path($self, "$dir/$fn");
-			last if --$n < 0;
+			last if _try_path($self, "$dir/$fn") and --$n < 0;
 		}
 		if ($n < 0) {
 			unshift @{$self->{scan_q}}, [ $dir, $dh ];

^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2024-12-26 21:48 UTC | newest]

Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-12-26 21:48 [PATCH] watch: don't count invalid paths against batch limit Eric Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).