* [PATCH 0/2] introduce public-inbox-watch
@ 2016-06-17 0:41 Eric Wong
2016-06-17 0:41 ` [PATCH 1/2] filter: split out scrub method from delivery Eric Wong
2016-06-17 0:41 ` [PATCH 2/2] watch: introduce watch directive Eric Wong
0 siblings, 2 replies; 4+ messages in thread
From: Eric Wong @ 2016-06-17 0:41 UTC (permalink / raw)
To: meta
This will allow easier mirroring for existing subscribers
of mailing lists; as -mda may reject messages which may
be acceptable to the list being mirrored.
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH 1/2] filter: split out scrub method from delivery
2016-06-17 0:41 [PATCH 0/2] introduce public-inbox-watch Eric Wong
@ 2016-06-17 0:41 ` Eric Wong
2016-06-17 0:41 ` [PATCH 2/2] watch: introduce watch directive Eric Wong
1 sibling, 0 replies; 4+ messages in thread
From: Eric Wong @ 2016-06-17 0:41 UTC (permalink / raw)
To: meta
We will scrub for importing archives, so ensure it is usable
outside of the delivery routine.
---
lib/PublicInbox/Filter/Base.pm | 9 ++++++++-
lib/PublicInbox/Filter/Vger.pm | 7 ++++++-
2 files changed, 14 insertions(+), 2 deletions(-)
diff --git a/lib/PublicInbox/Filter/Base.pm b/lib/PublicInbox/Filter/Base.pm
index 0991e87..37f1ee7 100644
--- a/lib/PublicInbox/Filter/Base.pm
+++ b/lib/PublicInbox/Filter/Base.pm
@@ -62,6 +62,13 @@ sub reject ($$) {
sub err ($) { $_[0]->{err} }
+# by default, scrub is a no-op, see PublicInbox::Filter::Vger::scrub
+# for an example of the override
+sub scrub {
+ my ($self, $mime) = @_;
+ $self->ACCEPT($mime);
+}
+
# for MDA
sub delivery {
my ($self, $mime) = @_;
@@ -94,7 +101,7 @@ sub delivery {
push @r, 'Rejected suffixes(s): '.join(', ', sort keys %sfx);
}
- @r ? $self->reject(join("\n", @r)) : $self->ACCEPT;
+ @r ? $self->reject(join("\n", @r)) : $self->scrub($mime);
}
1;
diff --git a/lib/PublicInbox/Filter/Vger.pm b/lib/PublicInbox/Filter/Vger.pm
index 9498081..2ffed18 100644
--- a/lib/PublicInbox/Filter/Vger.pm
+++ b/lib/PublicInbox/Filter/Vger.pm
@@ -17,7 +17,7 @@ my $l3 =
# only LKML had this, and LKML nowadays has no list trailer since Jan 2016
my $l4 = qr!Please read the FAQ at +http://www\.tux\.org/lkml/!;
-sub delivery {
+sub scrub {
my ($self, $mime) = @_;
my $s = $mime->as_string;
@@ -30,4 +30,9 @@ sub delivery {
$self->ACCEPT($mime);
}
+sub delivery {
+ my ($self, $mime) = @_;
+ $self->scrub($mime);
+}
+
1;
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 2/2] watch: introduce watch directive
2016-06-17 0:41 [PATCH 0/2] introduce public-inbox-watch Eric Wong
2016-06-17 0:41 ` [PATCH 1/2] filter: split out scrub method from delivery Eric Wong
@ 2016-06-17 0:41 ` Eric Wong
2016-06-17 1:25 ` [PATCH 3/2] watch: quiet down rejected header matches Eric Wong
1 sibling, 1 reply; 4+ messages in thread
From: Eric Wong @ 2016-06-17 0:41 UTC (permalink / raw)
To: meta
This will allow users to run importers off existing mail
accounts where they may not have access to run -mda.
Currently, we only support Maildirs, but IMAP ought to be
doable.
---
MANIFEST | 2 +
lib/PublicInbox/Config.pm | 3 +-
lib/PublicInbox/WatchMaildir.pm | 141 ++++++++++++++++++++++++++++++++++++++++
script/public-inbox-watch | 16 +++++
4 files changed, 161 insertions(+), 1 deletion(-)
create mode 100644 lib/PublicInbox/WatchMaildir.pm
create mode 100755 script/public-inbox-watch
diff --git a/MANIFEST b/MANIFEST
index fdb92e0..9c8cc1c 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -74,6 +74,7 @@ lib/PublicInbox/Thread.pm
lib/PublicInbox/Unsubscribe.pm
lib/PublicInbox/View.pm
lib/PublicInbox/WWW.pm
+lib/PublicInbox/WatchMaildir.pm
lib/PublicInbox/WwwAttach.pm
sa_config/Makefile
sa_config/README
@@ -85,6 +86,7 @@ script/public-inbox-init
script/public-inbox-learn
script/public-inbox-mda
script/public-inbox-nntpd
+script/public-inbox-watch
script/public-inbox.cgi
scripts/dc-dlvr
scripts/dc-dlvr.pre
diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm
index 4651861..43ffba7 100644
--- a/lib/PublicInbox/Config.pm
+++ b/lib/PublicInbox/Config.pm
@@ -120,7 +120,8 @@ sub _fill {
my ($self, $pfx) = @_;
my $rv = {};
- foreach my $k (qw(mainrepo address filter url newsgroup)) {
+ foreach my $k (qw(mainrepo address filter url newsgroup
+ watch watchheader)) {
my $v = $self->{"$pfx.$k"};
$rv->{$k} = $v if defined $v;
}
diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm
new file mode 100644
index 0000000..b23556a
--- /dev/null
+++ b/lib/PublicInbox/WatchMaildir.pm
@@ -0,0 +1,141 @@
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+package PublicInbox::WatchMaildir;
+use strict;
+use warnings;
+use Email::MIME;
+use Email::MIME::ContentType;
+$Email::MIME::ContentType::STRICT_PARAMS = 0; # user input is imperfect
+use PublicInbox::Git;
+use PublicInbox::Import;
+use PublicInbox::MDA;
+
+sub new {
+ my ($class, $config) = @_;
+ my (%mdmap, @mdir);
+ foreach my $k (keys %$config) {
+ $k =~ /\Apublicinbox\.([^\.]+)\.watch\z/ or next;
+ my $name = $1;
+ my $watch = $config->{$k};
+ if ($watch =~ s/\Amaildir://) {
+ $watch =~ s!/+\z!!;
+ my $inbox = $config->lookup_name($name);
+ if (my $wm = $inbox->{watchheader}) {
+ my ($k, $v) = split(/:/, $wm, 2);
+ $inbox->{-watchheader} = [ $k, qr/\Q$v\E/ ];
+ }
+ my $new = "$watch/new";
+ my $cur = "$watch/cur";
+ push @mdir, $new, $cur;
+ $mdmap{$new} = $inbox;
+ $mdmap{$cur} = $inbox;
+ } else {
+ warn "watch unsupported: $k=$watch\n";
+ }
+ }
+ return unless @mdir;
+
+ my $mdre = join('|', map { quotemeta($_) } @mdir);
+ $mdre = qr!\A($mdre)/!;
+ bless {
+ mdmap => \%mdmap,
+ mdir => \@mdir,
+ mdre => $mdre,
+ importers => {},
+ }, $class;
+}
+
+sub _try_fsn_paths {
+ my ($self, $paths) = @_;
+ _try_path($self, $_->{path}) foreach @$paths;
+ $_->done foreach values %{$self->{importers}};
+}
+
+sub _try_path {
+ my ($self, $path) = @_;
+ if ($path !~ $self->{mdre}) {
+ warn "unrecognized path: $path\n";
+ return;
+ }
+ my $inbox = $self->{mdmap}->{$1};
+ unless ($inbox) {
+ warn "unmappable dir: $1\n";
+ return;
+ }
+ my $im = $inbox->{-import} ||= eval {
+ my $git = $inbox->git;
+ my $name = $inbox->{name};
+ my $addr = $inbox->{-primary_address};
+ PublicInbox::Import->new($git, $name, $addr);
+ };
+ $self->{importers}->{"$im"} = $im;
+ my $mime;
+ if (open my $fh, '<', $path) {
+ local $/;
+ my $str = <$fh>;
+ $str or return;
+ $mime = Email::MIME->new(\$str);
+ } elsif ($!{ENOENT}) {
+ return;
+ } else {
+ warn "failed to open $path: $!\n";
+ return;
+ }
+
+ $mime->header_set($_) foreach @PublicInbox::MDA::BAD_HEADERS;
+ my $wm = $inbox->{-watchheader};
+ if ($wm) {
+ my $v = $mime->header_obj->header_raw($wm->[0]);
+ unless ($v && $v =~ $wm->[1]) {
+ warn "$wm->[0] failed to match $wm->[1]\n";
+ return;
+ }
+ }
+ my $f = $inbox->{filter};
+ if ($f && $f =~ /::/) {
+ eval "require $f";
+ if ($@) {
+ warn $@;
+ } else {
+ $f = $f->new;
+ $mime = $f->scrub($mime);
+ }
+ }
+ $mime or return;
+ my $mid = $mime->header_obj->header_raw('Message-Id');
+ $im->add($mime);
+}
+
+sub watch {
+ my ($self) = @_;
+ my $cb = sub { _try_fsn_paths($self, \@_) };
+ my $mdir = $self->{mdir};
+
+ require Filesys::Notify::Simple;
+ my $watcher = Filesys::Notify::Simple->new($mdir);
+ $watcher->wait($cb) while (1);
+}
+
+sub scan {
+ my ($self) = @_;
+ my $mdir = $self->{mdir};
+ foreach my $dir (@$mdir) {
+ my $ok = opendir(my $dh, $dir);
+ unless ($ok) {
+ warn "failed to open $dir: $!\n";
+ next;
+ }
+ while (my $fn = readdir($dh)) {
+ next unless $fn =~ /\A[a-zA-Z0-9][\w:,=\.]+\z/;
+ $fn = "$dir/$fn";
+ if (-f $fn) {
+ _try_path($self, $fn);
+ } else {
+ warn "not a file: $fn\n";
+ }
+ }
+ closedir $dh;
+ }
+}
+
+1;
diff --git a/script/public-inbox-watch b/script/public-inbox-watch
new file mode 100755
index 0000000..42ae55a
--- /dev/null
+++ b/script/public-inbox-watch
@@ -0,0 +1,16 @@
+#!/usr/bin/perl -w
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use PublicInbox::WatchMaildir;
+use PublicInbox::Config;
+my $config = PublicInbox::Config->new;
+my $watch_md = PublicInbox::WatchMaildir->new($config);
+if ($watch_md) {
+ my $scan = sub { $watch_md->scan };
+ $SIG{USR1} = $scan;
+ $SIG{ALRM} = sub { $SIG{ALRM} = 'DEFAULT'; $scan->() };
+ alarm(1);
+ $watch_md->watch;
+}
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 3/2] watch: quiet down rejected header matches
2016-06-17 0:41 ` [PATCH 2/2] watch: introduce watch directive Eric Wong
@ 2016-06-17 1:25 ` Eric Wong
0 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2016-06-17 1:25 UTC (permalink / raw)
To: meta
People may use this directive because they prefer to merge
several mailing lists into one local mailbox, so there may
be many messages and we should not needlessly clutter logs
for this.
---
lib/PublicInbox/WatchMaildir.pm | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm
index b23556a..3536375 100644
--- a/lib/PublicInbox/WatchMaildir.pm
+++ b/lib/PublicInbox/WatchMaildir.pm
@@ -86,10 +86,7 @@ sub _try_path {
my $wm = $inbox->{-watchheader};
if ($wm) {
my $v = $mime->header_obj->header_raw($wm->[0]);
- unless ($v && $v =~ $wm->[1]) {
- warn "$wm->[0] failed to match $wm->[1]\n";
- return;
- }
+ return unless ($v && $v =~ $wm->[1]);
}
my $f = $inbox->{filter};
if ($f && $f =~ /::/) {
^ permalink raw reply related [flat|nested] 4+ messages in thread
end of thread, other threads:[~2016-06-17 1:25 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-06-17 0:41 [PATCH 0/2] introduce public-inbox-watch Eric Wong
2016-06-17 0:41 ` [PATCH 1/2] filter: split out scrub method from delivery Eric Wong
2016-06-17 0:41 ` [PATCH 2/2] watch: introduce watch directive Eric Wong
2016-06-17 1:25 ` [PATCH 3/2] watch: quiet down rejected header matches Eric Wong
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).