* [PATCH 1/3] watch: more details about full scan start/completion
2024-07-26 21:59 [PATCH 0/3] public-inbox-watch Maildir diagnostics Eric Wong
@ 2024-07-26 21:59 ` Eric Wong
2024-07-26 21:59 ` [PATCH 2/3] watch: only open one directory at a time when scanning Eric Wong
2024-07-26 21:59 ` [PATCH 3/3] watch: add per-directory scanning diagnostics Eric Wong
2 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2024-07-26 21:59 UTC (permalink / raw)
To: meta; +Cc: Robin H . Johnson
Start and stop happens infrequently and may be useful for
diagnosing problems about missing messages. A future change
will add more details about per-directory scans.
Requested-by: Robin H. Johnson <robbat2@orbis-terrarum.net>
---
lib/PublicInbox/Watch.pm | 3 ++-
script/public-inbox-watch | 3 ++-
t/watch_filter_rubylang.t | 2 +-
3 files changed, 5 insertions(+), 3 deletions(-)
diff --git a/lib/PublicInbox/Watch.pm b/lib/PublicInbox/Watch.pm
index eb90d353..74cc599c 100644
--- a/lib/PublicInbox/Watch.pm
+++ b/lib/PublicInbox/Watch.pm
@@ -620,7 +620,8 @@ sub fs_scan_step {
}
_done_for_now($self);
# do we have more work to do?
- PublicInbox::DS::requeue($self) if keys %$opendirs;
+ keys(%$opendirs) ? PublicInbox::DS::requeue($self)
+ : warn("# full scan complete\n");
}
sub scan {
diff --git a/script/public-inbox-watch b/script/public-inbox-watch
index 9bcd42ed..64300f09 100755
--- a/script/public-inbox-watch
+++ b/script/public-inbox-watch
@@ -40,7 +40,8 @@ my $reload = sub {
if ($watch) {
my $scan = sub {
return if !$watch;
- warn "# scanning\n";
+ my ($s) = @_;
+ warn "# scanning (full) ", ($s ? "on $s" : 'at startup'), "\n";
$watch->trigger_scan('full');
};
my $quit = sub { # may be called in IMAP/NNTP children
diff --git a/t/watch_filter_rubylang.t b/t/watch_filter_rubylang.t
index f72feb9f..81f6b00e 100644
--- a/t/watch_filter_rubylang.t
+++ b/t/watch_filter_rubylang.t
@@ -103,7 +103,7 @@ EOM
$ibx->{-no_fsync} = 1;
is($ibx->search->reopen->mset('b:spam')->size, 0, 'spam removed');
- is_deeply([], \@warn, 'no warnings');
+ is_deeply [], [ grep !/^#/, @warn ], 'no warnings';
}
done_testing();
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 2/3] watch: only open one directory at a time when scanning
2024-07-26 21:59 [PATCH 0/3] public-inbox-watch Maildir diagnostics Eric Wong
2024-07-26 21:59 ` [PATCH 1/3] watch: more details about full scan start/completion Eric Wong
@ 2024-07-26 21:59 ` Eric Wong
2024-07-26 21:59 ` [PATCH 3/3] watch: add per-directory scanning diagnostics Eric Wong
2 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2024-07-26 21:59 UTC (permalink / raw)
To: meta
This avoids EMFILE/ENFILE for large setups with many Maildir
watch directives. It also makes adding per-directory scanning
messages easier in the next commit.
---
lib/PublicInbox/Watch.pm | 56 ++++++++++++++++-----------------------
script/public-inbox-watch | 2 +-
2 files changed, 24 insertions(+), 34 deletions(-)
diff --git a/lib/PublicInbox/Watch.pm b/lib/PublicInbox/Watch.pm
index 74cc599c..f793d0d8 100644
--- a/lib/PublicInbox/Watch.pm
+++ b/lib/PublicInbox/Watch.pm
@@ -154,7 +154,7 @@ sub new {
imap_order => scalar(@imap) ? \@imap : undef,
nntp_order => scalar(@nntp) ? \@nntp: undef,
importers => {},
- opendirs => {}, # dirname => dirhandle (in progress scans)
+ scan_q => [], # [ [dirname,dirhandle] || dirname, ...]
ops => [], # 'quit', 'full'
}, $class;
}
@@ -289,7 +289,9 @@ sub quit_done ($) {
sub quit { # may be called in IMAP/NNTP children
my ($self) = @_;
$self->{quit} = 1;
- %{$self->{opendirs}} = ();
+ if (my $scan_q = $self->{scan_q}) {
+ @$scan_q = ();
+ }
_done_for_now($self);
quit_done($self);
if (my $dir_idle = delete $self->{dir_idle}) {
@@ -417,7 +419,7 @@ sub watch_imap_idle_1 ($$$) {
sub watch_atfork_child ($) {
my ($self) = @_;
- delete @$self{qw(dir_idle pids opendirs)};
+ delete @$self{qw(dir_idle pids scan_q)};
my $sig = delete $self->{sig};
$sig->{CHLD} = $sig->{HUP} = $sig->{USR1} = 'DEFAULT';
# TERM/QUIT/INT call ->quit, which works in both parent+child
@@ -578,56 +580,44 @@ sub watch { # main entry point
_done_for_now($self);
}
-sub trigger_scan {
- my ($self, $op) = @_;
- push @{$self->{ops}}, $op;
- PublicInbox::DS::requeue($self);
-}
-
sub fs_scan_step {
my ($self) = @_;
return if $self->{quit};
- my $op = shift @{$self->{ops}};
local $PublicInbox::DS::in_loop = 0; # waitpid() synchronously
# continue existing scan
- my $opendirs = $self->{opendirs};
- my @dirnames = keys %$opendirs;
- foreach my $dir (@dirnames) {
- my $dh = delete $opendirs->{$dir};
+ while (defined(my $dir = shift @{$self->{scan_q}})) {
+ my $dh;
+ if (ref($dir) eq 'ARRAY') { # continue existing
+ ($dir, $dh) = @$dir;
+ } elsif (!opendir($dh, $dir)) {
+ warn "W: failed to open $dir: $! (non-fatal)\n";
+ next;
+ }
my $n = $self->{max_batch};
while (my $fn = readdir($dh)) {
_try_path($self, "$dir/$fn");
last if --$n < 0;
}
- $opendirs->{$dir} = $dh if $n < 0;
- }
- if ($op && $op eq 'full') {
- foreach my $dir (keys %{$self->{d_map}}) {
- next if $opendirs->{$dir}; # already in progress
- my $ok = opendir(my $dh, $dir);
- unless ($ok) {
- warn "failed to open $dir: $!\n";
- next;
- }
- my $n = $self->{max_batch};
- while (my $fn = readdir($dh)) {
- _try_path($self, "$dir/$fn");
- last if --$n < 0;
- }
- $opendirs->{$dir} = $dh if $n < 0;
+ if ($n < 0) {
+ unshift @{$self->{scan_q}}, [ $dir, $dh ];
+ last;
}
}
_done_for_now($self);
# do we have more work to do?
- keys(%$opendirs) ? PublicInbox::DS::requeue($self)
+ @{$self->{scan_q}} ? PublicInbox::DS::requeue($self)
: warn("# full scan complete\n");
}
sub scan {
my ($self, $op) = @_;
- push @{$self->{ops}}, $op;
- fs_scan_step($self);
+ if ($op eq 'full') {
+ push @{$self->{scan_q}}, keys %{$self->{d_map}};
+ fs_scan_step($self);
+ } else {
+ warn "BUG? unknown scan op: `$op' (non-fatal)\n";
+ }
}
sub _importer_for {
diff --git a/script/public-inbox-watch b/script/public-inbox-watch
index 64300f09..80dd5e07 100755
--- a/script/public-inbox-watch
+++ b/script/public-inbox-watch
@@ -42,7 +42,7 @@ if ($watch) {
return if !$watch;
my ($s) = @_;
warn "# scanning (full) ", ($s ? "on $s" : 'at startup'), "\n";
- $watch->trigger_scan('full');
+ $watch->scan('full');
};
my $quit = sub { # may be called in IMAP/NNTP children
$watch->quit if $watch;
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 3/3] watch: add per-directory scanning diagnostics
2024-07-26 21:59 [PATCH 0/3] public-inbox-watch Maildir diagnostics Eric Wong
2024-07-26 21:59 ` [PATCH 1/3] watch: more details about full scan start/completion Eric Wong
2024-07-26 21:59 ` [PATCH 2/3] watch: only open one directory at a time when scanning Eric Wong
@ 2024-07-26 21:59 ` Eric Wong
2 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2024-07-26 21:59 UTC (permalink / raw)
To: meta; +Cc: Robin H . Johnson
This may help track down problems associated with a single
directory. Note we emit a separate message for each of the
`new' and `cur' subdirectories of a Maildir. Full scans only
happen at startup (or manually), so it shouldn't be too noisy
if logging to syslog.
Requested-by: Robin H. Johnson <robbat2@orbis-terrarum.net>
---
lib/PublicInbox/Watch.pm | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/lib/PublicInbox/Watch.pm b/lib/PublicInbox/Watch.pm
index f793d0d8..0520967f 100644
--- a/lib/PublicInbox/Watch.pm
+++ b/lib/PublicInbox/Watch.pm
@@ -590,7 +590,9 @@ sub fs_scan_step {
my $dh;
if (ref($dir) eq 'ARRAY') { # continue existing
($dir, $dh) = @$dir;
- } elsif (!opendir($dh, $dir)) {
+ } elsif (opendir($dh, $dir)) {
+ warn "# scanning $dir ...\n";
+ } else {
warn "W: failed to open $dir: $! (non-fatal)\n";
next;
}
@@ -602,6 +604,8 @@ sub fs_scan_step {
if ($n < 0) {
unshift @{$self->{scan_q}}, [ $dir, $dh ];
last;
+ } else {
+ warn "# done scanning $dir\n";
}
}
_done_for_now($self);
^ permalink raw reply related [flat|nested] 4+ messages in thread