* [REJECT] add filter for RubyLang lists
@ 2017-06-21 23:33 Eric Wong
2017-06-22 0:31 ` Eric Wong
2017-06-22 7:11 ` [PATCH v2] " Eric Wong
0 siblings, 2 replies; 5+ messages in thread
From: Eric Wong @ 2017-06-21 23:33 UTC (permalink / raw)
To: meta
Unfortunately, it appears we have to reject this and instead add
support filtering at View time(*), due to DKIM signatures in
messages from ruby-lang.org.
(*) which may not be worth it
---
MANIFEST | 1 +
lib/PublicInbox/AltId.pm | 4 +--
lib/PublicInbox/Filter/RubyLang.pm | 59 ++++++++++++++++++++++++++++++++++++++
lib/PublicInbox/WatchMaildir.pm | 2 +-
4 files changed, 63 insertions(+), 3 deletions(-)
create mode 100644 lib/PublicInbox/Filter/RubyLang.pm
diff --git a/MANIFEST b/MANIFEST
index d0b7f2b..c7c4a92 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -51,6 +51,7 @@ lib/PublicInbox/ExtMsg.pm
lib/PublicInbox/Feed.pm
lib/PublicInbox/Filter/Base.pm
lib/PublicInbox/Filter/Mirror.pm
+lib/PublicInbox/Filter/RubyLang.pm
lib/PublicInbox/Filter/SubjectTag.pm
lib/PublicInbox/Filter/Vger.pm
lib/PublicInbox/GetlineBody.pm
diff --git a/lib/PublicInbox/AltId.pm b/lib/PublicInbox/AltId.pm
index 6fdc3a2..73fecd5 100644
--- a/lib/PublicInbox/AltId.pm
+++ b/lib/PublicInbox/AltId.pm
@@ -9,7 +9,7 @@ use URI::Escape qw(uri_unescape);
# spec: TYPE:PREFIX:param1=value1¶m2=value2&...
# Example: serial:gmane:file=/path/to/altmsgmap.sqlite3
sub new {
- my ($class, $inbox, $spec) = @_;
+ my ($class, $inbox, $spec, $writable) = @_;
my ($type, $prefix, $query) = split(/:/, $spec, 3);
$type eq 'serial' or die "non-serial not supported, yet\n";
@@ -25,7 +25,7 @@ sub new {
$f = "$inbox->{mainrepo}/public-inbox/$f";
}
bless {
- mm_alt => PublicInbox::Msgmap->new_file($f),
+ mm_alt => PublicInbox::Msgmap->new_file($f, $writable),
xprefix => 'X'.uc($prefix),
}, $class;
}
diff --git a/lib/PublicInbox/Filter/RubyLang.pm b/lib/PublicInbox/Filter/RubyLang.pm
new file mode 100644
index 0000000..a0e6d7b
--- /dev/null
+++ b/lib/PublicInbox/Filter/RubyLang.pm
@@ -0,0 +1,59 @@
+# Copyright (C) 2017 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Filter for lists.ruby-lang.org trailers
+package PublicInbox::Filter::RubyLang;
+use base qw(PublicInbox::Filter::Base);
+use strict;
+use warnings;
+
+my $l1 = qr/Unsubscribe:\s
+ <mailto:ruby-\w+-request\@ruby-lang\.org\?subject=unsubscribe>/x;
+my $l2 = qr{<http://lists\.ruby-lang\.org/cgi-bin/mailman/options/ruby-\w+>};
+
+sub new {
+ my ($class, %opts) = @_;
+ my $altid = delete $opts{-altid};
+ my $self = $class->SUPER::new(%opts);
+ # altid = serial:ruby-core:file=msgmap.sqlite3
+ if ($altid) {
+ require PublicInbox::MID; # mid_clean
+ my $ibx = $self->{-inbox};
+ require PublicInbox::AltId;
+ $self->{-altid} = PublicInbox::AltId->new($ibx, $altid, 1);
+ }
+ $self;
+}
+
+sub scrub {
+ my ($self, $mime) = @_;
+ # no msg_iter here, that is only for read-only access
+ $mime->walk_parts(sub {
+ my ($part) = $_[0];
+ my $ct = $part->content_type;
+ if (!$ct || $ct =~ m{\btext/plain\b}i) {
+ my $s = eval { $part->body_str };
+ if (defined $s && $s =~ s/\n?$l1\n$l2\n\z//os) {
+ $part->body_str_set($s);
+ }
+ }
+ });
+ my $altid = $self->{-altid};
+ if ($altid) {
+ my $hdr = $mime->header_obj;
+ my $n = $hdr->header_raw('X-Mail-Count');
+ my $mid = $hdr->header_raw('Message-ID');
+ if (defined $n && defined $mid && $n =~ /\A\s*\d+\s*\z/) {
+ $mid = PublicInbox::MID::mid_clean($mid);
+ $altid->{mm_alt}->mid_set($n, $mid);
+ }
+ }
+ $self->ACCEPT($mime);
+}
+
+sub delivery {
+ my ($self, $mime) = @_;
+ $self->scrub($mime);
+}
+
+1;
diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm
index c436742..8588f16 100644
--- a/lib/PublicInbox/WatchMaildir.pm
+++ b/lib/PublicInbox/WatchMaildir.pm
@@ -242,7 +242,7 @@ sub _scrubber_for {
my ($inbox) = @_;
my $f = $inbox->{filter};
if ($f && $f =~ /::/) {
- my @args;
+ my @args = (-inbox => $inbox);
# basic line splitting, only
# Perhaps we can have proper quote splitting one day...
($f, @args) = split(/\s+/, $f) if $f =~ /\s+/;
--
EW
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [REJECT] add filter for RubyLang lists
2017-06-21 23:33 [REJECT] add filter for RubyLang lists Eric Wong
@ 2017-06-22 0:31 ` Eric Wong
2017-06-22 7:11 ` [PATCH v2] " Eric Wong
1 sibling, 0 replies; 5+ messages in thread
From: Eric Wong @ 2017-06-22 0:31 UTC (permalink / raw)
To: meta
Eric Wong <e@80x24.org> wrote:
> Unfortunately, it appears we have to reject this and instead add
> support filtering at View time(*), due to DKIM signatures in
> messages from ruby-lang.org.
Nevermind, I'll apply this patch. DKIM signatures are only
added by Redmine, and invalid by the time it hits ruby-core.
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH v2] add filter for RubyLang lists
2017-06-21 23:33 [REJECT] add filter for RubyLang lists Eric Wong
2017-06-22 0:31 ` Eric Wong
@ 2017-06-22 7:11 ` Eric Wong
2017-06-22 22:02 ` [PATCH 1/2] filter/rubylang: reuse altid entry from inbox object Eric Wong
1 sibling, 1 reply; 5+ messages in thread
From: Eric Wong @ 2017-06-22 7:11 UTC (permalink / raw)
To: meta
Unfortunately, it appears we have to reject this and instead add
support filtering at View time(*), due to DKIM signatures in
messages from ruby-lang.org.
(*) which may not be worth it
---
MANIFEST | 1 +
lib/PublicInbox/AltId.pm | 4 +--
lib/PublicInbox/Filter/RubyLang.pm | 63 ++++++++++++++++++++++++++++++++++++++
lib/PublicInbox/WatchMaildir.pm | 2 +-
4 files changed, 67 insertions(+), 3 deletions(-)
create mode 100644 lib/PublicInbox/Filter/RubyLang.pm
diff --git a/MANIFEST b/MANIFEST
index d0b7f2b..c7c4a92 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -51,6 +51,7 @@ lib/PublicInbox/ExtMsg.pm
lib/PublicInbox/Feed.pm
lib/PublicInbox/Filter/Base.pm
lib/PublicInbox/Filter/Mirror.pm
+lib/PublicInbox/Filter/RubyLang.pm
lib/PublicInbox/Filter/SubjectTag.pm
lib/PublicInbox/Filter/Vger.pm
lib/PublicInbox/GetlineBody.pm
diff --git a/lib/PublicInbox/AltId.pm b/lib/PublicInbox/AltId.pm
index 6fdc3a2..73fecd5 100644
--- a/lib/PublicInbox/AltId.pm
+++ b/lib/PublicInbox/AltId.pm
@@ -9,7 +9,7 @@ use URI::Escape qw(uri_unescape);
# spec: TYPE:PREFIX:param1=value1¶m2=value2&...
# Example: serial:gmane:file=/path/to/altmsgmap.sqlite3
sub new {
- my ($class, $inbox, $spec) = @_;
+ my ($class, $inbox, $spec, $writable) = @_;
my ($type, $prefix, $query) = split(/:/, $spec, 3);
$type eq 'serial' or die "non-serial not supported, yet\n";
@@ -25,7 +25,7 @@ sub new {
$f = "$inbox->{mainrepo}/public-inbox/$f";
}
bless {
- mm_alt => PublicInbox::Msgmap->new_file($f),
+ mm_alt => PublicInbox::Msgmap->new_file($f, $writable),
xprefix => 'X'.uc($prefix),
}, $class;
}
diff --git a/lib/PublicInbox/Filter/RubyLang.pm b/lib/PublicInbox/Filter/RubyLang.pm
new file mode 100644
index 0000000..ec4bc32
--- /dev/null
+++ b/lib/PublicInbox/Filter/RubyLang.pm
@@ -0,0 +1,63 @@
+# Copyright (C) 2017 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Filter for lists.ruby-lang.org trailers
+package PublicInbox::Filter::RubyLang;
+use base qw(PublicInbox::Filter::Base);
+use strict;
+use warnings;
+
+my $l1 = qr/Unsubscribe:\s
+ <mailto:ruby-\w+-request\@ruby-lang\.org\?subject=unsubscribe>/x;
+my $l2 = qr{<http://lists\.ruby-lang\.org/cgi-bin/mailman/options/ruby-\w+>};
+
+sub new {
+ my ($class, %opts) = @_;
+ my $altid = delete $opts{-altid};
+ my $self = $class->SUPER::new(%opts);
+ # altid = serial:ruby-core:file=msgmap.sqlite3
+ if ($altid) {
+ require PublicInbox::MID; # mid_clean
+ my $ibx = $self->{-inbox};
+ require PublicInbox::AltId;
+ $self->{-altid} = PublicInbox::AltId->new($ibx, $altid, 1);
+ }
+ $self;
+}
+
+sub scrub {
+ my ($self, $mime) = @_;
+ # no msg_iter here, that is only for read-only access
+ $mime->walk_parts(sub {
+ my ($part) = $_[0];
+ my $ct = $part->content_type;
+ if (!$ct || $ct =~ m{\btext/plain\b}i) {
+ my $s = eval { $part->body_str };
+ if (defined $s && $s =~ s/\n?$l1\n$l2\n\z//os) {
+ $part->body_str_set($s);
+ }
+ }
+ });
+ my $altid = $self->{-altid};
+ if ($altid) {
+ my $hdr = $mime->header_obj;
+ my $mid = $hdr->header_raw('Message-ID');
+ unless (defined $mid) {
+ return $self->REJECT('Message-Id missing');
+ }
+ my $n = $hdr->header_raw('X-Mail-Count');
+ if (!defined($n) || $n !~ /\A\s*\d+\s*\z/) {
+ return $self->REJECT('X-Mail-Count not numeric');
+ }
+ $mid = PublicInbox::MID::mid_clean($mid);
+ $altid->{mm_alt}->mid_set($n, $mid);
+ }
+ $self->ACCEPT($mime);
+}
+
+sub delivery {
+ my ($self, $mime) = @_;
+ $self->scrub($mime);
+}
+
+1;
diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm
index c436742..8588f16 100644
--- a/lib/PublicInbox/WatchMaildir.pm
+++ b/lib/PublicInbox/WatchMaildir.pm
@@ -242,7 +242,7 @@ sub _scrubber_for {
my ($inbox) = @_;
my $f = $inbox->{filter};
if ($f && $f =~ /::/) {
- my @args;
+ my @args = (-inbox => $inbox);
# basic line splitting, only
# Perhaps we can have proper quote splitting one day...
($f, @args) = split(/\s+/, $f) if $f =~ /\s+/;
--
EW
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 1/2] filter/rubylang: reuse altid entry from inbox object
2017-06-22 7:11 ` [PATCH v2] " Eric Wong
@ 2017-06-22 22:02 ` Eric Wong
2017-06-22 22:02 ` [PATCH 2/2] test for PublicInbox::Filter::RubyLang Eric Wong
0 siblings, 1 reply; 5+ messages in thread
From: Eric Wong @ 2017-06-22 22:02 UTC (permalink / raw)
To: meta
This allows users to DRY up their config a bit and avoid
specifying altid twice when reusing the NNTP-centric msgmap
for [ruby-*:\d+] serial numbers.
My current work-in-progress ~/.public-inbox/config entry
for the ruby-core list is:
------8<-------
[publicinbox "ruby-core"]
address = ruby-core@ruby-lang.org
url = //public-inbox.org/ruby-core
mainrepo = /path/to/ruby-core.git
newsgroup = inbox.comp.lang.ruby.core
watchheader = List-Id:<ruby-core.ruby-lang.org>
altid = serial:ruby-core:file=msgmap.sqlite3
watch = maildir:/path/to/Maildir/.INBOX.ruby
filter = PublicInbox::Filter::RubyLang
---
lib/PublicInbox/Filter/RubyLang.pm | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/lib/PublicInbox/Filter/RubyLang.pm b/lib/PublicInbox/Filter/RubyLang.pm
index ec4bc32..5240e8d 100644
--- a/lib/PublicInbox/Filter/RubyLang.pm
+++ b/lib/PublicInbox/Filter/RubyLang.pm
@@ -15,10 +15,13 @@ sub new {
my ($class, %opts) = @_;
my $altid = delete $opts{-altid};
my $self = $class->SUPER::new(%opts);
+ my $ibx = $self->{-inbox};
# altid = serial:ruby-core:file=msgmap.sqlite3
+ if (!$altid && $ibx && $ibx->{altid}) {
+ $altid ||= $ibx->{altid}->[0];
+ }
if ($altid) {
require PublicInbox::MID; # mid_clean
- my $ibx = $self->{-inbox};
require PublicInbox::AltId;
$self->{-altid} = PublicInbox::AltId->new($ibx, $altid, 1);
}
--
EW
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 2/2] test for PublicInbox::Filter::RubyLang
2017-06-22 22:02 ` [PATCH 1/2] filter/rubylang: reuse altid entry from inbox object Eric Wong
@ 2017-06-22 22:02 ` Eric Wong
0 siblings, 0 replies; 5+ messages in thread
From: Eric Wong @ 2017-06-22 22:02 UTC (permalink / raw)
To: meta
This will make it easier to prevent breakage in the future.
---
MANIFEST | 1 +
t/filter_rubylang.t | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 59 insertions(+)
create mode 100644 t/filter_rubylang.t
diff --git a/MANIFEST b/MANIFEST
index c7c4a92..43ac991 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -130,6 +130,7 @@ t/fail-bin/spamc
t/feed.t
t/filter_base.t
t/filter_mirror.t
+t/filter_rubylang.t
t/filter_subjecttag.t
t/filter_vger.t
t/git-http-backend.psgi
diff --git a/t/filter_rubylang.t b/t/filter_rubylang.t
new file mode 100644
index 0000000..bfccc35
--- /dev/null
+++ b/t/filter_rubylang.t
@@ -0,0 +1,58 @@
+# Copyright (C) 2017 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Test::More;
+use Email::MIME;
+use File::Temp qw/tempdir/;
+use_ok 'PublicInbox::Filter::RubyLang';
+
+my $f = PublicInbox::Filter::RubyLang->new;
+ok($f, 'created PublicInbox::Filter::RubyLang object');
+my $msg = <<'EOF';
+Subject: test
+
+keep this
+
+Unsubscribe: <mailto:ruby-core-request@ruby-lang.org?subject=unsubscribe>
+<http://lists.ruby-lang.org/cgi-bin/mailman/options/ruby-core>
+EOF
+my $mime = Email::MIME->new($msg);
+my $ret = $f->delivery($mime);
+is($ret, $mime, "delivery successful");
+is($mime->body, "keep this\n", 'normal message filtered OK');
+
+SKIP: {
+ eval 'require DBD::SQLite';
+ skip 'DBD::SQLite missing for altid mapping', 4 if $@;
+ use_ok 'PublicInbox::Inbox';
+ my $git_dir = tempdir('pi-filter_rubylang-XXXXXX',
+ TMPDIR => 1, CLEANUP => 1);
+ is(mkdir("$git_dir/public-inbox"), 1, "created public-inbox dir");
+ my $altid = [ "serial:ruby-core:file=msgmap.sqlite3" ];
+ my $ibx = PublicInbox::Inbox->new({ mainrepo => $git_dir,
+ altid => $altid });
+ $f = PublicInbox::Filter::RubyLang->new(-inbox => $ibx);
+ $msg = <<'EOF';
+X-Mail-Count: 12
+Message-ID: <a@b>
+
+EOF
+ $mime = Email::MIME->new($msg);
+ $ret = $f->delivery($mime);
+ is($ret, $mime, "delivery successful");
+ my $mm = PublicInbox::Msgmap->new($git_dir);
+ is($mm->num_for('a@b'), 12, 'MM entry created based on X-ML-Count');
+
+ $msg = <<'EOF';
+X-Mail-Cout: 12
+Message-ID: <b@b>
+
+EOF
+
+ $mime = Email::MIME->new($msg);
+ $ret = $f->delivery($mime);
+ is($ret, 100, "delivery rejected without X-Mail-Count");
+}
+
+done_testing();
--
EW
^ permalink raw reply related [flat|nested] 5+ messages in thread
end of thread, other threads:[~2017-06-22 22:02 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-06-21 23:33 [REJECT] add filter for RubyLang lists Eric Wong
2017-06-22 0:31 ` Eric Wong
2017-06-22 7:11 ` [PATCH v2] " Eric Wong
2017-06-22 22:02 ` [PATCH 1/2] filter/rubylang: reuse altid entry from inbox object Eric Wong
2017-06-22 22:02 ` [PATCH 2/2] test for PublicInbox::Filter::RubyLang Eric Wong
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).