unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH] dir_idle: detect files which are gone
Date: Fri, 14 May 2021 07:27:57 +0000	[thread overview]
Message-ID: <20210514072757.24883-1-e@80x24.org> (raw)

lei now makes use of this to clean up after unlinked sockets
with less delay.  This will also be used to maintain
mail_sync.sqlite3.
---
 lib/PublicInbox/DirIdle.pm     | 11 ++++--
 lib/PublicInbox/FakeInotify.pm | 63 +++++++++++++++++++++++++++++-----
 lib/PublicInbox/KQNotify.pm    | 12 ++++---
 lib/PublicInbox/LEI.pm         | 17 +++++----
 t/dir_idle.t                   | 18 +++++++++-
 t/fake_inotify.t               | 21 +++++++++---
 6 files changed, 115 insertions(+), 27 deletions(-)

diff --git a/lib/PublicInbox/DirIdle.pm b/lib/PublicInbox/DirIdle.pm
index 5437190d..e53fd9d1 100644
--- a/lib/PublicInbox/DirIdle.pm
+++ b/lib/PublicInbox/DirIdle.pm
@@ -8,22 +8,25 @@ use parent 'PublicInbox::DS';
 use PublicInbox::Syscall qw(EPOLLIN EPOLLET);
 use PublicInbox::In2Tie;
 
-my ($MAIL_IN, $ino_cls);
+my ($MAIL_IN, $MAIL_GONE, $ino_cls);
 if ($^O eq 'linux' && eval { require Linux::Inotify2; 1 }) {
 	$MAIL_IN = Linux::Inotify2::IN_MOVED_TO() |
 		Linux::Inotify2::IN_CREATE();
+	$MAIL_GONE = Linux::Inotify2::IN_DELETE();
 	$ino_cls = 'Linux::Inotify2';
 # Perl 5.22+ is needed for fileno(DIRHANDLE) support:
 } elsif ($^V ge v5.22 && eval { require PublicInbox::KQNotify }) {
 	$MAIL_IN = PublicInbox::KQNotify::MOVED_TO_OR_CREATE();
+	$MAIL_GONE = PublicInbox::KQNotify::NOTE_DELETE();
 	$ino_cls = 'PublicInbox::KQNotify';
 } else {
 	require PublicInbox::FakeInotify;
 	$MAIL_IN = PublicInbox::FakeInotify::MOVED_TO_OR_CREATE();
+	$MAIL_GONE = PublicInbox::FakeInotify::IN_DELETE();
 }
 
 sub new {
-	my ($class, $dirs, $cb) = @_;
+	my ($class, $dirs, $cb, $gone) = @_;
 	my $self = bless { cb => $cb }, $class;
 	my $inot;
 	if ($ino_cls) {
@@ -36,7 +39,9 @@ sub new {
 	}
 
 	# Linux::Inotify2->watch or similar
-	$inot->watch($_, $MAIL_IN) for @$dirs;
+	my $fl = $MAIL_IN;
+	$fl |= $MAIL_GONE if $gone;
+	$inot->watch($_, $fl) for @$dirs;
 	$self->{inot} = $inot;
 	PublicInbox::FakeInotify::poll_once($self) if !$ino_cls;
 	$self;
diff --git a/lib/PublicInbox/FakeInotify.pm b/lib/PublicInbox/FakeInotify.pm
index 25818e07..644f5b5b 100644
--- a/lib/PublicInbox/FakeInotify.pm
+++ b/lib/PublicInbox/FakeInotify.pm
@@ -5,16 +5,29 @@
 # enough of Linux::Inotify2
 package PublicInbox::FakeInotify;
 use strict;
+use v5.10.1;
+use parent qw(Exporter);
 use Time::HiRes qw(stat);
 use PublicInbox::DS qw(add_timer);
 sub IN_MODIFY () { 0x02 } # match Linux inotify
 # my $IN_MOVED_TO = 0x80;
 # my $IN_CREATE = 0x100;
 sub MOVED_TO_OR_CREATE () { 0x80 | 0x100 }
+sub IN_DELETE () { 0x00000200 }
+
+our @EXPORT_OK = qw(fill_dirlist on_dir_change);
 
 my $poll_intvl = 2; # same as Filesys::Notify::Simple
 
-sub new { bless { watch => {} }, __PACKAGE__ }
+sub new { bless { watch => {}, dirlist => {} }, __PACKAGE__ }
+
+sub fill_dirlist ($$$) {
+	my ($self, $path, $dh) = @_;
+	my $dirlist = $self->{dirlist}->{$path} = {};
+	while (defined(my $n = readdir($dh))) {
+		$dirlist->{$n} = undef if $n !~ /\A\.\.?\z/;
+	}
+}
 
 # behaves like Linux::Inotify2->watch
 sub watch {
@@ -22,11 +35,19 @@ sub watch {
 	my @st = stat($path) or return;
 	my $k = "$path\0$mask";
 	$self->{watch}->{$k} = $st[10]; # 10 - ctime
+	if ($mask & IN_DELETE) {
+		opendir(my $dh, $path) or return;
+		fill_dirlist($self, $path, $dh);
+	}
 	bless [ $self->{watch}, $k ], 'PublicInbox::FakeInotify::Watch';
 }
 
-sub on_new_files ($$$$) {
-	my ($events, $dh, $path, $old_ctime) = @_;
+# also used by KQNotify since it kevent requires readdir on st_nlink
+# count changes.
+sub on_dir_change ($$$$;$) {
+	my ($events, $dh, $path, $old_ctime, $dirlist) = @_;
+	my $oldlist = $dirlist->{$path};
+	my $newlist = $oldlist ? {} : undef;
 	while (defined(my $base = readdir($dh))) {
 		next if $base =~ /\A\.\.?\z/;
 		my $full = "$path/$base";
@@ -35,7 +56,19 @@ sub on_new_files ($$$$) {
 			push @$events,
 				bless(\$full, 'PublicInbox::FakeInotify::Event')
 		}
+		if (!@st) {
+			# ignore ENOENT due to race
+			warn "unhandled stat($full) error: $!\n" if !$!{ENOENT};
+		} elsif ($newlist) {
+			$newlist->{$base} = undef;
+		}
 	}
+	return if !$newlist;
+	delete @$oldlist{keys %$newlist};
+	$dirlist->{$path} = $newlist;
+	push(@$events, map {
+		bless \"$path/$_", 'PublicInbox::FakeInotify::GoneEvent'
+	} keys %$oldlist);
 }
 
 # behaves like non-blocking Linux::Inotify2->read
@@ -43,6 +76,7 @@ sub read {
 	my ($self) = @_;
 	my $watch = $self->{watch} or return ();
 	my $events = [];
+	my @watch_gone;
 	for my $x (keys %$watch) {
 		my ($path, $mask) = split(/\0/, $x, 2);
 		my @now = stat($path) or next;
@@ -52,14 +86,18 @@ sub read {
 		if ($mask & IN_MODIFY) {
 			push @$events,
 				bless(\$path, 'PublicInbox::FakeInotify::Event')
-		} elsif ($mask & MOVED_TO_OR_CREATE) {
-			opendir(my $dh, $path) or do {
+		} elsif ($mask & (MOVED_TO_OR_CREATE | IN_DELETE)) {
+			if (opendir(my $dh, $path)) {
+				on_dir_change($events, $dh, $path, $old_ctime,
+						$self->{dirlist});
+			} elsif ($!{ENOENT}) {
+				push @watch_gone, $x;
+			} else {
 				warn "W: opendir $path: $!\n";
-				next;
-			};
-			on_new_files($events, $dh, $path, $old_ctime);
+			}
 		}
 	}
+	delete @$watch{@watch_gone};
 	@$events;
 }
 
@@ -86,4 +124,13 @@ package PublicInbox::FakeInotify::Event;
 use strict;
 
 sub fullname { ${$_[0]} }
+
+sub IN_DELETE { 0 }
+
+package PublicInbox::FakeInotify::GoneEvent;
+use strict;
+our @ISA = qw(PublicInbox::FakeInotify::Event);
+
+sub IN_DELETE { 1 }
+
 1;
diff --git a/lib/PublicInbox/KQNotify.pm b/lib/PublicInbox/KQNotify.pm
index cfea6b1b..fc321a16 100644
--- a/lib/PublicInbox/KQNotify.pm
+++ b/lib/PublicInbox/KQNotify.pm
@@ -5,9 +5,10 @@
 # using IO::KQueue on *BSD systems.
 package PublicInbox::KQNotify;
 use strict;
+use v5.10.1;
 use IO::KQueue;
 use PublicInbox::DSKQXS; # wraps IO::KQueue for fork-safe DESTROY
-use PublicInbox::FakeInotify;
+use PublicInbox::FakeInotify qw(fill_dirlist on_dir_change);
 use Time::HiRes qw(stat);
 
 # NOTE_EXTEND detects rename(2), NOTE_WRITE detects link(2)
@@ -37,8 +38,9 @@ sub watch {
 		EV_ADD | EV_CLEAR, # flags
 		$mask, # fflags
 		0, 0); # data, udata
-	if ($mask == NOTE_WRITE || $mask == MOVED_TO_OR_CREATE) {
+	if ($mask & (MOVED_TO_OR_CREATE | NOTE_DELETE)) {
 		$self->{watch}->{$ident} = $watch;
+		fill_dirlist($self, $path, $fh) if $mask & NOTE_DELETE;
 	} else {
 		die "TODO Not implemented: $mask";
 	}
@@ -68,12 +70,12 @@ sub read {
 		if (!defined($old_ctime)) {
 			push @$events,
 				bless(\$path, 'PublicInbox::FakeInotify::Event')
-		} elsif ($mask & MOVED_TO_OR_CREATE) {
+		} elsif ($mask & (MOVED_TO_OR_CREATE | NOTE_DELETE)) {
 			my @new_st = stat($path) or next;
 			$self->{watch}->{$ident}->[3] = $new_st[10]; # ctime
 			rewinddir($dh);
-			PublicInbox::FakeInotify::on_new_files($events, $dh,
-							$path, $old_ctime);
+			on_dir_change($events, $dh, $path, $old_ctime,
+					$self->{dirlist});
 		}
 	}
 	@$events;
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index 7349c261..5f178418 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -28,7 +28,7 @@ use Time::HiRes qw(stat); # ctime comparisons for config cache
 use File::Path qw(mkpath);
 use File::Spec;
 our $quit = \&CORE::exit;
-our ($current_lei, $errors_log, $listener, $oldset);
+our ($current_lei, $errors_log, $listener, $oldset, $dir_idle);
 my ($recv_cmd, $send_cmd);
 my $GLP = Getopt::Long::Parser->new;
 $GLP->configure(qw(gnu_getopt no_ignore_case auto_abbrev));
@@ -539,6 +539,7 @@ sub _lei_atfork_child {
 	}
 	close $listener if $listener;
 	undef $listener;
+	undef $dir_idle;
 	%PATH2CFG = ();
 	undef $errors_log;
 	$quit = \&CORE::exit;
@@ -1114,8 +1115,8 @@ sub dump_and_clear_log {
 sub lazy_start {
 	my ($path, $errno, $narg) = @_;
 	local ($errors_log, $listener);
-	($errors_log) = ($path =~ m!\A(.+?/)[^/]+\z!);
-	$errors_log .= 'errors.log';
+	my ($sock_dir) = ($path =~ m!\A(.+?)/[^/]+\z!);
+	$errors_log = "$sock_dir/errors.log";
 	my $addr = pack_sockaddr_un($path);
 	my $lk = bless { lock_path => $errors_log }, 'PublicInbox::Lock';
 	$lk->lock_acquire;
@@ -1187,9 +1188,13 @@ sub lazy_start {
 	local @SIG{keys %$sig} = values(%$sig) unless $sigfd;
 	undef $sig;
 	local $SIG{PIPE} = 'IGNORE';
-	if ($sigfd) { # TODO: use inotify/kqueue to detect unlinked sockets
-		undef $sigfd;
-		PublicInbox::DS->SetLoopTimeout(5000);
+	require PublicInbox::DirIdle;
+	local $dir_idle = PublicInbox::DirIdle->new([$sock_dir], sub {
+		# just rely on wakeup ot hit PostLoopCallback set below
+		_dir_idle_handler(@_) if $_[0]->fullname ne $path;
+	}, 1);
+	if ($sigfd) {
+		undef $sigfd; # unref, already in DS::DescriptorMap
 	} else {
 		# wake up every second to accept signals if we don't
 		# have signalfd or IO::KQueue:
diff --git a/t/dir_idle.t b/t/dir_idle.t
index d62eb5a2..969c16e9 100644
--- a/t/dir_idle.t
+++ b/t/dir_idle.t
@@ -1,6 +1,22 @@
 #!perl -w
 # Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-use Test::More;
+use v5.10.1; use strict; use PublicInbox::TestCommon;
+use PublicInbox::DS qw(now);
+use File::Path qw(make_path);
 use_ok 'PublicInbox::DirIdle';
+my ($tmpdir, $for_destroy) = tmpdir();
+make_path("$tmpdir/a/b");
+my @x;
+my $cb = sub { push @x, \@_ };
+my $di = PublicInbox::DirIdle->new(["$tmpdir/a"], $cb, 1);
+PublicInbox::DS->SetLoopTimeout(1000);
+my $end = 3 + now;
+PublicInbox::DS->SetPostLoopCallback(sub { scalar(@x) == 0 && now < $end });
+tick(0.011);
+rmdir("$tmpdir/a/b") or xbail "rmdir $!";
+PublicInbox::DS->EventLoop;
+is(scalar(@x), 1, 'got an event') and
+	is($x[0]->[0]->fullname, "$tmpdir/a/b", 'got expected fullname');
+PublicInbox::DS->Reset;
 done_testing;
diff --git a/t/fake_inotify.t b/t/fake_inotify.t
index 5c925ae6..734ddbfb 100644
--- a/t/fake_inotify.t
+++ b/t/fake_inotify.t
@@ -5,12 +5,12 @@
 # Ensure FakeInotify can pick up rename(2) and link(2) operations
 # used by Maildir writing tools
 use strict;
-use Test::More;
 use PublicInbox::TestCommon;
 use_ok 'PublicInbox::FakeInotify';
 my $MIN_FS_TICK = 0.011; # for low-res CONFIG_HZ=100 systems
 my ($tmpdir, $for_destroy) = tmpdir();
 mkdir "$tmpdir/new" or BAIL_OUT "mkdir: $!";
+mkdir "$tmpdir/new/rmd" or BAIL_OUT "mkdir: $!";
 open my $fh, '>', "$tmpdir/tst" or BAIL_OUT "open: $!";
 close $fh or BAIL_OUT "close: $!";
 
@@ -18,12 +18,12 @@ my $fi = PublicInbox::FakeInotify->new;
 my $mask = PublicInbox::FakeInotify::MOVED_TO_OR_CREATE();
 my $w = $fi->watch("$tmpdir/new", $mask);
 
-select undef, undef, undef, $MIN_FS_TICK;
+tick $MIN_FS_TICK;
 rename("$tmpdir/tst", "$tmpdir/new/tst") or BAIL_OUT "rename: $!";
 my @events = map { $_->fullname } $fi->read;
 is_deeply(\@events, ["$tmpdir/new/tst"], 'rename(2) detected');
 
-select undef, undef, undef, $MIN_FS_TICK;
+tick $MIN_FS_TICK;
 open $fh, '>', "$tmpdir/tst" or BAIL_OUT "open: $!";
 close $fh or BAIL_OUT "close: $!";
 link("$tmpdir/tst", "$tmpdir/new/link") or BAIL_OUT "link: $!";
@@ -31,10 +31,23 @@ link("$tmpdir/tst", "$tmpdir/new/link") or BAIL_OUT "link: $!";
 is_deeply(\@events, ["$tmpdir/new/link"], 'link(2) detected');
 
 $w->cancel;
-select undef, undef, undef, $MIN_FS_TICK;
+tick $MIN_FS_TICK;
 link("$tmpdir/new/tst", "$tmpdir/new/link2") or BAIL_OUT "link: $!";
 @events = map { $_->fullname } $fi->read;
 is_deeply(\@events, [], 'link(2) not detected after cancel');
+$fi->watch("$tmpdir/new", PublicInbox::FakeInotify::IN_DELETE());
+
+tick $MIN_FS_TICK;
+rmdir("$tmpdir/new/rmd") or xbail "rmdir: $!";
+@events = $fi->read;
+is_deeply([map{ $_->fullname }@events], ["$tmpdir/new/rmd"], 'rmdir detected');
+ok($events[0]->IN_DELETE, 'IN_DELETE set on rmdir');
+
+tick $MIN_FS_TICK;
+unlink("$tmpdir/new/tst") or xbail "unlink: $!";
+@events = grep { ref =~ /Gone/ } $fi->read;
+is_deeply([map{ $_->fullname }@events], ["$tmpdir/new/tst"], 'unlink detected');
+ok($events[0]->IN_DELETE, 'IN_DELETE set on unlink');
 
 PublicInbox::DS->Reset;
 

                 reply	other threads:[~2021-05-14  7:27 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210514072757.24883-1-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).