unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
* [PATCH 0/2] introduce public-inbox-watch
@ 2016-06-17  0:41 Eric Wong
  2016-06-17  0:41 ` [PATCH 1/2] filter: split out scrub method from delivery Eric Wong
  2016-06-17  0:41 ` [PATCH 2/2] watch: introduce watch directive Eric Wong
  0 siblings, 2 replies; 4+ messages in thread
From: Eric Wong @ 2016-06-17  0:41 UTC (permalink / raw)
  To: meta

This will allow easier mirroring for existing subscribers
of mailing lists; as -mda may reject messages which may
be acceptable to the list being mirrored.


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 1/2] filter: split out scrub method from delivery
  2016-06-17  0:41 [PATCH 0/2] introduce public-inbox-watch Eric Wong
@ 2016-06-17  0:41 ` Eric Wong
  2016-06-17  0:41 ` [PATCH 2/2] watch: introduce watch directive Eric Wong
  1 sibling, 0 replies; 4+ messages in thread
From: Eric Wong @ 2016-06-17  0:41 UTC (permalink / raw)
  To: meta

We will scrub for importing archives, so ensure it is usable
outside of the delivery routine.
---
 lib/PublicInbox/Filter/Base.pm | 9 ++++++++-
 lib/PublicInbox/Filter/Vger.pm | 7 ++++++-
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/lib/PublicInbox/Filter/Base.pm b/lib/PublicInbox/Filter/Base.pm
index 0991e87..37f1ee7 100644
--- a/lib/PublicInbox/Filter/Base.pm
+++ b/lib/PublicInbox/Filter/Base.pm
@@ -62,6 +62,13 @@ sub reject ($$) {
 
 sub err ($) { $_[0]->{err} }
 
+# by default, scrub is a no-op, see PublicInbox::Filter::Vger::scrub
+# for an example of the override
+sub scrub {
+	my ($self, $mime) = @_;
+	$self->ACCEPT($mime);
+}
+
 # for MDA
 sub delivery {
 	my ($self, $mime) = @_;
@@ -94,7 +101,7 @@ sub delivery {
 		push @r, 'Rejected suffixes(s): '.join(', ', sort keys %sfx);
 	}
 
-	@r ? $self->reject(join("\n", @r)) : $self->ACCEPT;
+	@r ? $self->reject(join("\n", @r)) : $self->scrub($mime);
 }
 
 1;
diff --git a/lib/PublicInbox/Filter/Vger.pm b/lib/PublicInbox/Filter/Vger.pm
index 9498081..2ffed18 100644
--- a/lib/PublicInbox/Filter/Vger.pm
+++ b/lib/PublicInbox/Filter/Vger.pm
@@ -17,7 +17,7 @@ my $l3 =
 # only LKML had this, and LKML nowadays has no list trailer since Jan 2016
 my $l4 = qr!Please read the FAQ at +http://www\.tux\.org/lkml/!;
 
-sub delivery {
+sub scrub {
 	my ($self, $mime) = @_;
 	my $s = $mime->as_string;
 
@@ -30,4 +30,9 @@ sub delivery {
 	$self->ACCEPT($mime);
 }
 
+sub delivery {
+	my ($self, $mime) = @_;
+	$self->scrub($mime);
+}
+
 1;

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 2/2] watch: introduce watch directive
  2016-06-17  0:41 [PATCH 0/2] introduce public-inbox-watch Eric Wong
  2016-06-17  0:41 ` [PATCH 1/2] filter: split out scrub method from delivery Eric Wong
@ 2016-06-17  0:41 ` Eric Wong
  2016-06-17  1:25   ` [PATCH 3/2] watch: quiet down rejected header matches Eric Wong
  1 sibling, 1 reply; 4+ messages in thread
From: Eric Wong @ 2016-06-17  0:41 UTC (permalink / raw)
  To: meta

This will allow users to run importers off existing mail
accounts where they may not have access to run -mda.
Currently, we only support Maildirs, but IMAP ought to be
doable.
---
 MANIFEST                        |   2 +
 lib/PublicInbox/Config.pm       |   3 +-
 lib/PublicInbox/WatchMaildir.pm | 141 ++++++++++++++++++++++++++++++++++++++++
 script/public-inbox-watch       |  16 +++++
 4 files changed, 161 insertions(+), 1 deletion(-)
 create mode 100644 lib/PublicInbox/WatchMaildir.pm
 create mode 100755 script/public-inbox-watch

diff --git a/MANIFEST b/MANIFEST
index fdb92e0..9c8cc1c 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -74,6 +74,7 @@ lib/PublicInbox/Thread.pm
 lib/PublicInbox/Unsubscribe.pm
 lib/PublicInbox/View.pm
 lib/PublicInbox/WWW.pm
+lib/PublicInbox/WatchMaildir.pm
 lib/PublicInbox/WwwAttach.pm
 sa_config/Makefile
 sa_config/README
@@ -85,6 +86,7 @@ script/public-inbox-init
 script/public-inbox-learn
 script/public-inbox-mda
 script/public-inbox-nntpd
+script/public-inbox-watch
 script/public-inbox.cgi
 scripts/dc-dlvr
 scripts/dc-dlvr.pre
diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm
index 4651861..43ffba7 100644
--- a/lib/PublicInbox/Config.pm
+++ b/lib/PublicInbox/Config.pm
@@ -120,7 +120,8 @@ sub _fill {
 	my ($self, $pfx) = @_;
 	my $rv = {};
 
-	foreach my $k (qw(mainrepo address filter url newsgroup)) {
+	foreach my $k (qw(mainrepo address filter url newsgroup
+			watch watchheader)) {
 		my $v = $self->{"$pfx.$k"};
 		$rv->{$k} = $v if defined $v;
 	}
diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm
new file mode 100644
index 0000000..b23556a
--- /dev/null
+++ b/lib/PublicInbox/WatchMaildir.pm
@@ -0,0 +1,141 @@
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+package PublicInbox::WatchMaildir;
+use strict;
+use warnings;
+use Email::MIME;
+use Email::MIME::ContentType;
+$Email::MIME::ContentType::STRICT_PARAMS = 0; # user input is imperfect
+use PublicInbox::Git;
+use PublicInbox::Import;
+use PublicInbox::MDA;
+
+sub new {
+	my ($class, $config) = @_;
+	my (%mdmap, @mdir);
+	foreach my $k (keys %$config) {
+		$k =~ /\Apublicinbox\.([^\.]+)\.watch\z/ or next;
+		my $name = $1;
+		my $watch = $config->{$k};
+		if ($watch =~ s/\Amaildir://) {
+			$watch =~ s!/+\z!!;
+			my $inbox = $config->lookup_name($name);
+			if (my $wm = $inbox->{watchheader}) {
+				my ($k, $v) = split(/:/, $wm, 2);
+				$inbox->{-watchheader} = [ $k, qr/\Q$v\E/ ];
+			}
+			my $new = "$watch/new";
+			my $cur = "$watch/cur";
+			push @mdir, $new, $cur;
+			$mdmap{$new} = $inbox;
+			$mdmap{$cur} = $inbox;
+		} else {
+			warn "watch unsupported: $k=$watch\n";
+		}
+	}
+	return unless @mdir;
+
+	my $mdre = join('|', map { quotemeta($_) } @mdir);
+	$mdre = qr!\A($mdre)/!;
+	bless {
+		mdmap => \%mdmap,
+		mdir => \@mdir,
+		mdre => $mdre,
+		importers => {},
+	}, $class;
+}
+
+sub _try_fsn_paths {
+	my ($self, $paths) = @_;
+	_try_path($self, $_->{path}) foreach @$paths;
+	$_->done foreach values %{$self->{importers}};
+}
+
+sub _try_path {
+	my ($self, $path) = @_;
+	if ($path !~ $self->{mdre}) {
+		warn "unrecognized path: $path\n";
+		return;
+	}
+	my $inbox = $self->{mdmap}->{$1};
+	unless ($inbox) {
+		warn "unmappable dir: $1\n";
+		return;
+	}
+	my $im = $inbox->{-import} ||= eval {
+		my $git = $inbox->git;
+		my $name = $inbox->{name};
+		my $addr = $inbox->{-primary_address};
+		PublicInbox::Import->new($git, $name, $addr);
+	};
+	$self->{importers}->{"$im"} = $im;
+	my $mime;
+	if (open my $fh, '<', $path) {
+		local $/;
+		my $str = <$fh>;
+		$str or return;
+		$mime = Email::MIME->new(\$str);
+	} elsif ($!{ENOENT}) {
+		return;
+	} else {
+		warn "failed to open $path: $!\n";
+		return;
+	}
+
+	$mime->header_set($_) foreach @PublicInbox::MDA::BAD_HEADERS;
+	my $wm = $inbox->{-watchheader};
+	if ($wm) {
+		my $v = $mime->header_obj->header_raw($wm->[0]);
+		unless ($v && $v =~ $wm->[1]) {
+			warn "$wm->[0] failed to match $wm->[1]\n";
+			return;
+		}
+	}
+	my $f = $inbox->{filter};
+	if ($f && $f =~ /::/) {
+		eval "require $f";
+		if ($@) {
+			warn $@;
+		} else {
+			$f = $f->new;
+			$mime = $f->scrub($mime);
+		}
+	}
+	$mime or return;
+	my $mid = $mime->header_obj->header_raw('Message-Id');
+	$im->add($mime);
+}
+
+sub watch {
+	my ($self) = @_;
+	my $cb = sub { _try_fsn_paths($self, \@_) };
+	my $mdir = $self->{mdir};
+
+	require Filesys::Notify::Simple;
+	my $watcher = Filesys::Notify::Simple->new($mdir);
+	$watcher->wait($cb) while (1);
+}
+
+sub scan {
+	my ($self) = @_;
+	my $mdir = $self->{mdir};
+	foreach my $dir (@$mdir) {
+		my $ok = opendir(my $dh, $dir);
+		unless ($ok) {
+			warn "failed to open $dir: $!\n";
+			next;
+		}
+		while (my $fn = readdir($dh)) {
+			next unless $fn =~ /\A[a-zA-Z0-9][\w:,=\.]+\z/;
+			$fn = "$dir/$fn";
+			if (-f $fn) {
+				_try_path($self, $fn);
+			} else {
+				warn "not a file: $fn\n";
+			}
+		}
+		closedir $dh;
+	}
+}
+
+1;
diff --git a/script/public-inbox-watch b/script/public-inbox-watch
new file mode 100755
index 0000000..42ae55a
--- /dev/null
+++ b/script/public-inbox-watch
@@ -0,0 +1,16 @@
+#!/usr/bin/perl -w
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use PublicInbox::WatchMaildir;
+use PublicInbox::Config;
+my $config = PublicInbox::Config->new;
+my $watch_md = PublicInbox::WatchMaildir->new($config);
+if ($watch_md) {
+	my $scan = sub { $watch_md->scan };
+	$SIG{USR1} = $scan;
+	$SIG{ALRM} = sub { $SIG{ALRM} = 'DEFAULT'; $scan->() };
+	alarm(1);
+	$watch_md->watch;
+}

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 3/2] watch: quiet down rejected header matches
  2016-06-17  0:41 ` [PATCH 2/2] watch: introduce watch directive Eric Wong
@ 2016-06-17  1:25   ` Eric Wong
  0 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2016-06-17  1:25 UTC (permalink / raw)
  To: meta

People may use this directive because they prefer to merge
several mailing lists into one local mailbox, so there may
be many messages and we should not needlessly clutter logs
for this.
---
 lib/PublicInbox/WatchMaildir.pm | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm
index b23556a..3536375 100644
--- a/lib/PublicInbox/WatchMaildir.pm
+++ b/lib/PublicInbox/WatchMaildir.pm
@@ -86,10 +86,7 @@ sub _try_path {
 	my $wm = $inbox->{-watchheader};
 	if ($wm) {
 		my $v = $mime->header_obj->header_raw($wm->[0]);
-		unless ($v && $v =~ $wm->[1]) {
-			warn "$wm->[0] failed to match $wm->[1]\n";
-			return;
-		}
+		return unless ($v && $v =~ $wm->[1]);
 	}
 	my $f = $inbox->{filter};
 	if ($f && $f =~ /::/) {

^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2016-06-17  1:25 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-06-17  0:41 [PATCH 0/2] introduce public-inbox-watch Eric Wong
2016-06-17  0:41 ` [PATCH 1/2] filter: split out scrub method from delivery Eric Wong
2016-06-17  0:41 ` [PATCH 2/2] watch: introduce watch directive Eric Wong
2016-06-17  1:25   ` [PATCH 3/2] watch: quiet down rejected header matches Eric Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).