unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH] lei: MH: support inotify to detect updates
Date: Wed,  3 Jan 2024 10:23:15 +0000	[thread overview]
Message-ID: <20240103102315.2119260-1-e@80x24.org> (raw)

This should help us deal with MH sequence number packing and
invalidating mail_sync.sqlite3.
---
 lib/PublicInbox/LEI.pm          | 133 +++++++++++++++++---------------
 lib/PublicInbox/LeiMailSync.pm  |  10 ++-
 lib/PublicInbox/LeiNoteEvent.pm |  22 +++++-
 lib/PublicInbox/LeiWatch.pm     |   7 +-
 lib/PublicInbox/MHreader.pm     |   2 +-
 t/lei-watch.t                   |  12 ++-
 6 files changed, 112 insertions(+), 74 deletions(-)

diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index e0cfd55a..81f940fe 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -28,7 +28,7 @@ use PublicInbox::IPC;
 use Time::HiRes qw(stat); # ctime comparisons for config cache
 use File::Path ();
 use File::Spec;
-use Carp ();
+use Carp qw(carp);
 use Sys::Syslog qw(openlog syslog closelog);
 our $quit = \&CORE::exit;
 our ($current_lei, $errors_log, $listener, $oldset, $dir_idle);
@@ -38,7 +38,7 @@ my $GLP_PASS = Getopt::Long::Parser->new;
 $GLP_PASS->configure(qw(gnu_getopt no_ignore_case auto_abbrev pass_through));
 
 our (%PATH2CFG, # persistent for socket daemon
-$MDIR2CFGPATH, # /path/to/maildir => { /path/to/config => [ ino watches ] }
+$MDIR2CFGPATH, # location => { /path/to/config => [ ino watches ] }
 $OPT, # shared between optparse and opt_dash callback (for Getopt::Long)
 $daemon_pid
 );
@@ -606,7 +606,7 @@ sub _lei_atfork_child {
 	$dir_idle->force_close if $dir_idle;
 	undef $dir_idle;
 	%PATH2CFG = ();
-	$MDIR2CFGPATH = {};
+	$MDIR2CFGPATH = undef;
 	eval 'no warnings; undef $PublicInbox::LeiNoteEvent::to_flush';
 	undef $errors_log;
 	$quit = \&CORE::exit;
@@ -1252,32 +1252,43 @@ sub cfg2lei ($) {
 	$lei;
 }
 
+sub note_event ($@) { # runs lei_note_event for a given config file
+	my ($cfg_f, @args) = @_;
+	my $cfg = $PATH2CFG{$cfg_f} // return;
+	eval { cfg2lei($cfg)->dispatch('note-event', @args) };
+	carp "E: note-event $cfg_f: $@\n" if $@;
+}
+
 sub dir_idle_handler ($) { # PublicInbox::DirIdle callback
 	my ($ev) = @_; # Linux::Inotify2::Event or duck type
 	my $fn = $ev->fullname;
 	if ($fn =~ m!\A(.+)/(new|cur)/([^/]+)\z!) { # Maildir file
-		my ($mdir, $nc, $bn) = ($1, $2, $3);
-		$nc = '' if $ev->IN_DELETE || $ev->IN_MOVED_FROM;
-		for my $f (keys %{$MDIR2CFGPATH->{$mdir} // {}}) {
-			my $cfg = $PATH2CFG{$f} // next;
-			eval {
-				my $lei = cfg2lei($cfg);
-				$lei->dispatch('note-event',
-						"maildir:$mdir", $nc, $bn, $fn);
-			};
-			warn "E: note-event $f: $@\n" if $@;
+		my ($loc, $new_cur, $bn) = ("maildir:$1", $2, $3);
+		$new_cur = '' if $ev->IN_DELETE || $ev->IN_MOVED_FROM;
+		for my $cfg_f (keys %{$MDIR2CFGPATH->{$loc} // {}}) {
+			note_event($cfg_f, $loc, $new_cur, $bn, $fn);
 		}
-	}
+	} elsif ($fn =~ m!\A(.+)/([0-9]+)\z!) { # MH mail message file
+		my ($loc, $n, $new_cur) = ("mh:$1", $2, '+');
+		$new_cur = '' if $ev->IN_DELETE || $ev->IN_MOVED_FROM;
+		for my $cfg_f (keys %{$MDIR2CFGPATH->{$loc} // {}}) {
+			note_event($cfg_f, $loc, $new_cur, $n, $fn);
+		}
+	} elsif ($fn =~ m!\A(.+)/\.mh_sequences\z!) { # reread flags
+		my $loc = "mh:$1";
+		for my $cfg_f (keys %{$MDIR2CFGPATH->{$loc} // {}}) {
+			note_event($cfg_f, $loc, '.mh_sequences')
+		}
+	} # else we don't care
 	if ($ev->can('cancel') && ($ev->IN_IGNORE || $ev->IN_UNMOUNT)) {
 		$ev->cancel;
 	}
 	if ($fn =~ m!\A(.+)/(?:new|cur)\z! && !-e $fn) {
-		delete $MDIR2CFGPATH->{$1};
+		delete $MDIR2CFGPATH->{"maildir:$1"};
 	}
-	if (!-e $fn) { # config file or Maildir gone
-		for my $cfgpaths (values %$MDIR2CFGPATH) {
-			delete $cfgpaths->{$fn};
-		}
+	if (!-e $fn) { # config file, Maildir, or MH dir gone
+		delete $_->{$fn} for values %$MDIR2CFGPATH; # config file
+		delete @$MDIR2CFGPATH{"maildir:$fn", "mh:$fn"};
 		delete $PATH2CFG{$fn};
 	}
 }
@@ -1442,19 +1453,22 @@ sub watch_state_ok ($) {
 	$state =~ /\Apause|(?:import|index|tag)-(?:ro|rw)\z/;
 }
 
-sub cancel_maildir_watch ($$) {
-	my ($d, $cfg_f) = @_;
-	my $w = delete $MDIR2CFGPATH->{$d}->{$cfg_f};
-	scalar(keys %{$MDIR2CFGPATH->{$d}}) or
-		delete $MDIR2CFGPATH->{$d};
-	for my $x (@{$w // []}) { $x->cancel }
+sub cancel_dir_watch ($$$) {
+	my ($type, $d, $cfg_f) = @_;
+	my $loc = "$type:".canonpath_harder($d);
+	my $w = delete $MDIR2CFGPATH->{$loc}->{$cfg_f};
+	delete $MDIR2CFGPATH->{$loc} if !(keys %{$MDIR2CFGPATH->{$loc}});
+	$_->cancel for @$w;
 }
 
-sub add_maildir_watch ($$) {
-	my ($d, $cfg_f) = @_;
-	if (!exists($MDIR2CFGPATH->{$d}->{$cfg_f})) {
-		my @w = $dir_idle->add_watches(["$d/cur", "$d/new"], 1);
-		push @{$MDIR2CFGPATH->{$d}->{$cfg_f}}, @w if @w;
+sub add_dir_watch ($$$) {
+	my ($type, $d, $cfg_f) = @_;
+	$d = canonpath_harder($d);
+	my $loc = "$type:$d";
+	my @dirs = $type eq 'mh' ? ($d) : ("$d/cur", "$d/new");
+	if (!exists($MDIR2CFGPATH->{$loc}->{$cfg_f})) {
+		my @w = $dir_idle->add_watches(\@dirs, 1);
+		push @{$MDIR2CFGPATH->{$loc}->{$cfg_f}}, @w if @w;
 	}
 }
 
@@ -1467,24 +1481,20 @@ sub refresh_watches {
 	my %seen;
 	my $cfg_f = $cfg->{'-f'};
 	for my $w (grep(/\Awatch\..+\.state\z/, keys %$cfg)) {
-		my $url = substr($w, length('watch.'), -length('.state'));
+		my $loc = substr($w, length('watch.'), -length('.state'));
 		require PublicInbox::LeiWatch;
-		$watches->{$url} //= PublicInbox::LeiWatch->new($url);
-		$seen{$url} = undef;
-		my $state = $cfg->get_1("watch.$url.state");
+		$watches->{$loc} //= PublicInbox::LeiWatch->new($loc);
+		$seen{$loc} = undef;
+		my $state = $cfg->get_1("watch.$loc.state");
 		if (!watch_state_ok($state)) {
-			warn("watch.$url.state=$state not supported\n");
-			next;
-		}
-		if ($url =~ /\Amaildir:(.+)/i) {
-			my $d = canonpath_harder($1);
-			if ($state eq 'pause') {
-				cancel_maildir_watch($d, $cfg_f);
-			} else {
-				add_maildir_watch($d, $cfg_f);
-			}
+			warn("watch.$loc.state=$state not supported\n");
+		} elsif ($loc =~ /\A(maildir|mh):(.+)\z/i) {
+			my ($type, $d) = ($1, $2);
+			$state eq 'pause' ?
+				cancel_dir_watch($type, $d, $cfg_f) :
+				add_dir_watch($type, $d, $cfg_f);
 		} else { # TODO: imap/nntp/jmap
-			$lei->child_error(0, "E: watch $url not supported, yet")
+			$lei->child_error(0, "E: watch $loc not supported, yet")
 		}
 	}
 
@@ -1492,29 +1502,28 @@ sub refresh_watches {
 	my $lms = $lei->lms;
 	if ($lms) {
 		$lms->lms_write_prepare;
-		for my $d ($lms->folders('maildir:')) {
-			substr($d, 0, length('maildir:')) = '';
-
+		for my $loc ($lms->folders(qr/\A(?:maildir|mh):/)) {
+			my $old = $loc;
+			my ($type, $d) = split /:/, $loc, 2;
 			# fixup old bugs while we're iterating:
-			my $cd = canonpath_harder($d);
-			my $f = "maildir:$cd";
-			$lms->rename_folder("maildir:$d", $f) if $d ne $cd;
-			next if $watches->{$f}; # may be set to pause
+			$d = canonpath_harder($d);
+			$loc = "$type:$d";
+			$lms->rename_folder($old, $loc) if $old ne $loc;
+			next if $watches->{$loc}; # may be set to pause
 			require PublicInbox::LeiWatch;
-			$watches->{$f} = PublicInbox::LeiWatch->new($f);
-			$seen{$f} = undef;
-			add_maildir_watch($cd, $cfg_f);
+			$watches->{$loc} = PublicInbox::LeiWatch->new($loc);
+			$seen{$loc} = undef;
+			add_dir_watch($type, $d, $cfg_f);
 		}
 	}
 	if ($old) { # cull old non-existent entries
-		for my $url (keys %$old) {
-			next if exists $seen{$url};
-			delete $old->{$url};
-			if ($url =~ /\Amaildir:(.+)/i) {
-				my $d = canonpath_harder($1);
-				cancel_maildir_watch($d, $cfg_f);
+		for my $loc (keys %$old) {
+			next if exists $seen{$loc};
+			delete $old->{$loc};
+			if ($loc =~ /\A(maildir|mh):(.+)\z/i) {
+				cancel_dir_watch($1, $2, $cfg_f);
 			} else { # TODO: imap/nntp/jmap
-				$lei->child_error(0, "E: watch $url TODO");
+				$lei->child_error(0, "E: watch $loc TODO");
 			}
 		}
 	}
diff --git a/lib/PublicInbox/LeiMailSync.pm b/lib/PublicInbox/LeiMailSync.pm
index 593715dc..c498421c 100644
--- a/lib/PublicInbox/LeiMailSync.pm
+++ b/lib/PublicInbox/LeiMailSync.pm
@@ -425,9 +425,13 @@ sub folders {
 	my $re;
 	if (defined($pfx[0])) {
 		$sql .= ' WHERE loc REGEXP ?'; # DBD::SQLite uses perlre
-		$re = !!$pfx[1] ? '.*' : '';
-		$re .= quotemeta($pfx[0]);
-		$re .= '.*';
+		if (ref($pfx[0])) { # assume qr// "Regexp"
+			$re = $pfx[0];
+		} else {
+			$re = !!$pfx[1] ? '.*' : '';
+			$re .= quotemeta($pfx[0]);
+			$re .= '.*';
+		}
 	}
 	my $sth = ($self->{dbh} //= dbh_new($self))->prepare($sql);
 	$sth->bind_param(1, $re) if defined($re);
diff --git a/lib/PublicInbox/LeiNoteEvent.pm b/lib/PublicInbox/LeiNoteEvent.pm
index 8581bd9a..8d900d0c 100644
--- a/lib/PublicInbox/LeiNoteEvent.pm
+++ b/lib/PublicInbox/LeiNoteEvent.pm
@@ -60,6 +60,18 @@ sub maildir_event { # via wq_nonblock_do
 	} # else: eml_from_path already warns
 }
 
+sub _mh_cb { # mh_read_one cb
+	my ($dir, $bn, $kw, $eml, $self, $state) = @_;
+}
+
+sub mh_event { # via wq_nonblock_do
+	my ($self, $folder, $bn, $state) = @_;
+	my $dir = substr($folder, 3);
+	require PublicInbox::MHreader; # if we forked early
+	my $mhr = PublicInbox::MHreader->new($dir, $self->{lei}->{3});
+	$mhr->mh_read_one($bn, \&_mh_cb, $self, $state);
+}
+
 sub lei_note_event {
 	my ($lei, $folder, $new_cur, $bn, $fn, @rest) = @_;
 	die "BUG: unexpected: @rest" if @rest;
@@ -72,11 +84,14 @@ sub lei_note_event {
 	$lms->arg2folder($lei, [ $folder ]);
 	my $state = $cfg->get_1("watch.$folder.state") // 'tag-rw';
 	return if $state eq 'pause';
-	return $lms->clear_src($folder, \$bn) if $new_cur eq '';
+	if ($new_cur eq '') {
+		my $id = $folder =~ /\Amaildir:/ ? \$bn : $bn + 0;
+		return $lms->clear_src($folder, $id);
+	}
 	$lms->lms_pause;
 	$lei->ale; # prepare
 	$sto->write_prepare($lei);
-	require PublicInbox::MdirReader;
+	require PublicInbox::MHreader if $folder =~ /\Amh:/; # optimistic
 	my $self = $cfg->{-lei_note_event} //= do {
 		my $wq = bless { lms => $lms }, __PACKAGE__;
 		# MUAs such as mutt can trigger massive rename() storms so
@@ -91,12 +106,15 @@ sub lei_note_event {
 		$lei->{lne} = $wq;
 	};
 	if ($folder =~ /\Amaildir:/i) {
+		require PublicInbox::MdirReader;
 		my $fl = PublicInbox::MdirReader::maildir_basename_flags($bn)
 			// return;
 		return if index($fl, 'T') >= 0;
 		my $kw = PublicInbox::MdirReader::flags2kw($fl);
 		my $vmd = { kw => $kw, sync_info => [ $folder, \$bn ] };
 		$self->wq_nonblock_do('maildir_event', $fn, $vmd, $state);
+	} elsif ($folder =~ /\Amh:/) {
+		$self->wq_nonblock_do('mh_event', $folder, $bn, $state);
 	} # else: TODO: imap
 }
 
diff --git a/lib/PublicInbox/LeiWatch.pm b/lib/PublicInbox/LeiWatch.pm
index 35267b58..b30e5152 100644
--- a/lib/PublicInbox/LeiWatch.pm
+++ b/lib/PublicInbox/LeiWatch.pm
@@ -1,13 +1,12 @@
 # Copyright all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
-# represents a Maildir or IMAP "watch" item
+# represents a Maildir, MH or IMAP "watch" item
 package PublicInbox::LeiWatch;
-use strict;
-use v5.10.1;
+use v5.12;
 use parent qw(PublicInbox::IPC);
 
-# "url" may be something like "maildir:/path/to/dir"
+# "url" may be something like "maildir:/path/to/dir" or "mh:/path/to/dir"
 sub new { bless { url => $_[1] }, $_[0] }
 
 1;
diff --git a/lib/PublicInbox/MHreader.pm b/lib/PublicInbox/MHreader.pm
index 673e3e06..033aa740 100644
--- a/lib/PublicInbox/MHreader.pm
+++ b/lib/PublicInbox/MHreader.pm
@@ -82,7 +82,7 @@ sub kw_for ($$) {
 	\@kw;
 }
 
-sub _file2eml { # mh_each_file cb
+sub _file2eml { # mh_each_file / mh_read_one cb
 	my ($dir, $n, $self, $ucb, @arg) = @_;
 	my $eml = eml_from_path($n);
 	$ucb->($dir, $n, kw_for($self, $n), $eml, @arg) if $eml;
diff --git a/t/lei-watch.t b/t/lei-watch.t
index 7b357ee0..8ad50d13 100644
--- a/t/lei-watch.t
+++ b/t/lei-watch.t
@@ -3,6 +3,7 @@
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 use strict; use v5.10.1; use PublicInbox::TestCommon;
 use File::Path qw(make_path remove_tree);
+use PublicInbox::IO qw(write_file);
 plan skip_all => "TEST_FLAKY not enabled for $0" if !$ENV{TEST_FLAKY};
 require_mods('lei');
 my $have_fast_inotify = eval { require PublicInbox::Inotify } ||
@@ -13,7 +14,7 @@ $have_fast_inotify or
 
 my ($ro_home, $cfg_path) = setup_public_inboxes;
 test_lei(sub {
-	my $md = "$ENV{HOME}/md";
+	my ($md, $mh1, $mh2) = map { "$ENV{HOME}/$_" } qw(md mh1 mh2);
 	my $cfg_f = "$ENV{HOME}/.config/lei/config";
 	my $md2 = $md.'2';
 	lei_ok 'ls-watch';
@@ -45,13 +46,14 @@ test_lei(sub {
 	}
 
 	# first, make sure tag-ro works
-	make_path("$md/new", "$md/cur", "$md/tmp");
+	make_path("$md/new", "$md/cur", "$md/tmp", $mh1, $mh2);
 	lei_ok qw(add-watch --state=tag-ro), $md;
 	lei_ok 'ls-watch';
 	like($lei_out, qr/^\Qmaildir:$md\E$/sm, 'maildir shown');
 	lei_ok qw(q mid:testmessage@example.com -o), $md, '-I', "$ro_home/t1";
 	my @f = glob("$md/cur/*:2,");
 	is(scalar(@f), 1, 'got populated maildir with one result');
+
 	rename($f[0], "$f[0]S") or xbail "rename $!"; # set (S)een
 	tick($have_fast_inotify ? 0.2 : 2.2); # always needed for 1 CPU systems
 	lei_ok qw(note-event done); # flushes immediately (instead of 5s)
@@ -94,6 +96,12 @@ test_lei(sub {
 		my $cmp = [ <$fh> ];
 		is_xdeeply($cmp, $ino_contents, 'inotify Maildir watches gone');
 	};
+
+	write_file '>', "$mh1/.mh_sequences";
+	lei_ok qw(add-watch --state=tag-ro), $mh1, "mh:$mh2";
+	lei_ok 'ls-watch', \'refresh watches';
+	like $lei_out, qr/^\Qmh:$mh1\E$/sm, 'MH 1 shown';
+	like $lei_out, qr/^\Qmh:$mh2\E$/sm, 'MH 2 shown';
 });
 
 done_testing;

                 reply	other threads:[~2024-01-03 10:23 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240103102315.2119260-1-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).