unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 2/2] watch: introduce watch directive
Date: Fri, 17 Jun 2016 00:41:28 +0000	[thread overview]
Message-ID: <20160617004128.1037-3-e@80x24.org> (raw)
In-Reply-To: <20160617004128.1037-1-e@80x24.org>

This will allow users to run importers off existing mail
accounts where they may not have access to run -mda.
Currently, we only support Maildirs, but IMAP ought to be
doable.
---
 MANIFEST                        |   2 +
 lib/PublicInbox/Config.pm       |   3 +-
 lib/PublicInbox/WatchMaildir.pm | 141 ++++++++++++++++++++++++++++++++++++++++
 script/public-inbox-watch       |  16 +++++
 4 files changed, 161 insertions(+), 1 deletion(-)
 create mode 100644 lib/PublicInbox/WatchMaildir.pm
 create mode 100755 script/public-inbox-watch

diff --git a/MANIFEST b/MANIFEST
index fdb92e0..9c8cc1c 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -74,6 +74,7 @@ lib/PublicInbox/Thread.pm
 lib/PublicInbox/Unsubscribe.pm
 lib/PublicInbox/View.pm
 lib/PublicInbox/WWW.pm
+lib/PublicInbox/WatchMaildir.pm
 lib/PublicInbox/WwwAttach.pm
 sa_config/Makefile
 sa_config/README
@@ -85,6 +86,7 @@ script/public-inbox-init
 script/public-inbox-learn
 script/public-inbox-mda
 script/public-inbox-nntpd
+script/public-inbox-watch
 script/public-inbox.cgi
 scripts/dc-dlvr
 scripts/dc-dlvr.pre
diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm
index 4651861..43ffba7 100644
--- a/lib/PublicInbox/Config.pm
+++ b/lib/PublicInbox/Config.pm
@@ -120,7 +120,8 @@ sub _fill {
 	my ($self, $pfx) = @_;
 	my $rv = {};
 
-	foreach my $k (qw(mainrepo address filter url newsgroup)) {
+	foreach my $k (qw(mainrepo address filter url newsgroup
+			watch watchheader)) {
 		my $v = $self->{"$pfx.$k"};
 		$rv->{$k} = $v if defined $v;
 	}
diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm
new file mode 100644
index 0000000..b23556a
--- /dev/null
+++ b/lib/PublicInbox/WatchMaildir.pm
@@ -0,0 +1,141 @@
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+package PublicInbox::WatchMaildir;
+use strict;
+use warnings;
+use Email::MIME;
+use Email::MIME::ContentType;
+$Email::MIME::ContentType::STRICT_PARAMS = 0; # user input is imperfect
+use PublicInbox::Git;
+use PublicInbox::Import;
+use PublicInbox::MDA;
+
+sub new {
+	my ($class, $config) = @_;
+	my (%mdmap, @mdir);
+	foreach my $k (keys %$config) {
+		$k =~ /\Apublicinbox\.([^\.]+)\.watch\z/ or next;
+		my $name = $1;
+		my $watch = $config->{$k};
+		if ($watch =~ s/\Amaildir://) {
+			$watch =~ s!/+\z!!;
+			my $inbox = $config->lookup_name($name);
+			if (my $wm = $inbox->{watchheader}) {
+				my ($k, $v) = split(/:/, $wm, 2);
+				$inbox->{-watchheader} = [ $k, qr/\Q$v\E/ ];
+			}
+			my $new = "$watch/new";
+			my $cur = "$watch/cur";
+			push @mdir, $new, $cur;
+			$mdmap{$new} = $inbox;
+			$mdmap{$cur} = $inbox;
+		} else {
+			warn "watch unsupported: $k=$watch\n";
+		}
+	}
+	return unless @mdir;
+
+	my $mdre = join('|', map { quotemeta($_) } @mdir);
+	$mdre = qr!\A($mdre)/!;
+	bless {
+		mdmap => \%mdmap,
+		mdir => \@mdir,
+		mdre => $mdre,
+		importers => {},
+	}, $class;
+}
+
+sub _try_fsn_paths {
+	my ($self, $paths) = @_;
+	_try_path($self, $_->{path}) foreach @$paths;
+	$_->done foreach values %{$self->{importers}};
+}
+
+sub _try_path {
+	my ($self, $path) = @_;
+	if ($path !~ $self->{mdre}) {
+		warn "unrecognized path: $path\n";
+		return;
+	}
+	my $inbox = $self->{mdmap}->{$1};
+	unless ($inbox) {
+		warn "unmappable dir: $1\n";
+		return;
+	}
+	my $im = $inbox->{-import} ||= eval {
+		my $git = $inbox->git;
+		my $name = $inbox->{name};
+		my $addr = $inbox->{-primary_address};
+		PublicInbox::Import->new($git, $name, $addr);
+	};
+	$self->{importers}->{"$im"} = $im;
+	my $mime;
+	if (open my $fh, '<', $path) {
+		local $/;
+		my $str = <$fh>;
+		$str or return;
+		$mime = Email::MIME->new(\$str);
+	} elsif ($!{ENOENT}) {
+		return;
+	} else {
+		warn "failed to open $path: $!\n";
+		return;
+	}
+
+	$mime->header_set($_) foreach @PublicInbox::MDA::BAD_HEADERS;
+	my $wm = $inbox->{-watchheader};
+	if ($wm) {
+		my $v = $mime->header_obj->header_raw($wm->[0]);
+		unless ($v && $v =~ $wm->[1]) {
+			warn "$wm->[0] failed to match $wm->[1]\n";
+			return;
+		}
+	}
+	my $f = $inbox->{filter};
+	if ($f && $f =~ /::/) {
+		eval "require $f";
+		if ($@) {
+			warn $@;
+		} else {
+			$f = $f->new;
+			$mime = $f->scrub($mime);
+		}
+	}
+	$mime or return;
+	my $mid = $mime->header_obj->header_raw('Message-Id');
+	$im->add($mime);
+}
+
+sub watch {
+	my ($self) = @_;
+	my $cb = sub { _try_fsn_paths($self, \@_) };
+	my $mdir = $self->{mdir};
+
+	require Filesys::Notify::Simple;
+	my $watcher = Filesys::Notify::Simple->new($mdir);
+	$watcher->wait($cb) while (1);
+}
+
+sub scan {
+	my ($self) = @_;
+	my $mdir = $self->{mdir};
+	foreach my $dir (@$mdir) {
+		my $ok = opendir(my $dh, $dir);
+		unless ($ok) {
+			warn "failed to open $dir: $!\n";
+			next;
+		}
+		while (my $fn = readdir($dh)) {
+			next unless $fn =~ /\A[a-zA-Z0-9][\w:,=\.]+\z/;
+			$fn = "$dir/$fn";
+			if (-f $fn) {
+				_try_path($self, $fn);
+			} else {
+				warn "not a file: $fn\n";
+			}
+		}
+		closedir $dh;
+	}
+}
+
+1;
diff --git a/script/public-inbox-watch b/script/public-inbox-watch
new file mode 100755
index 0000000..42ae55a
--- /dev/null
+++ b/script/public-inbox-watch
@@ -0,0 +1,16 @@
+#!/usr/bin/perl -w
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use PublicInbox::WatchMaildir;
+use PublicInbox::Config;
+my $config = PublicInbox::Config->new;
+my $watch_md = PublicInbox::WatchMaildir->new($config);
+if ($watch_md) {
+	my $scan = sub { $watch_md->scan };
+	$SIG{USR1} = $scan;
+	$SIG{ALRM} = sub { $SIG{ALRM} = 'DEFAULT'; $scan->() };
+	alarm(1);
+	$watch_md->watch;
+}

  parent reply	other threads:[~2016-06-17  0:41 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-06-17  0:41 [PATCH 0/2] introduce public-inbox-watch Eric Wong
2016-06-17  0:41 ` [PATCH 1/2] filter: split out scrub method from delivery Eric Wong
2016-06-17  0:41 ` Eric Wong [this message]
2016-06-17  1:25   ` [PATCH 3/2] watch: quiet down rejected header matches Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160617004128.1037-3-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).