* [PATCH 2/2] allow admins to configure non-obfuscated addresses/domains
2017-06-23 22:34 [PATCH 0/2] selective obfuscation Eric Wong
2017-06-23 22:34 ` [PATCH 1/2] config: assume lists have multiple addresses Eric Wong
@ 2017-06-23 22:34 ` Eric Wong
1 sibling, 0 replies; 3+ messages in thread
From: Eric Wong @ 2017-06-23 22:34 UTC (permalink / raw)
To: meta
We will also treat all known list addresses as non-obfuscated.
By setting publicinbox.noObfuscate in ~/.public-inbox/config,
this will allow users to disable address obfuscation on a
per-domain or per-address basis.
---
MANIFEST | 1 +
lib/PublicInbox/Config.pm | 35 ++++++++++++++++++++++++++--
lib/PublicInbox/Hval.pm | 15 +++++++++++-
lib/PublicInbox/SearchView.pm | 9 ++++----
lib/PublicInbox/View.pm | 53 +++++++++++++++++++++++--------------------
t/config.t | 28 +++++++++++++++++++++++
t/hval.t | 33 +++++++++++++++++++++++++++
7 files changed, 142 insertions(+), 32 deletions(-)
create mode 100644 t/hval.t
diff --git a/MANIFEST b/MANIFEST
index 43ac991..983bd1e 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -142,6 +142,7 @@ t/httpd-corner.psgi
t/httpd-corner.t
t/httpd-unix.t
t/httpd.t
+t/hval.t
t/import.t
t/inbox.t
t/init.t
diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm
index 2be485e..369d9bd 100644
--- a/lib/PublicInbox/Config.pm
+++ b/lib/PublicInbox/Config.pm
@@ -20,7 +20,28 @@ sub new {
$self->{-by_addr} ||= {};
$self->{-by_name} ||= {};
$self->{-by_newsgroup} ||= {};
+ $self->{-no_obfuscate} ||= {};
$self->{-limiters} ||= {};
+
+ if (my $no = delete $self->{'publicinbox.noobfuscate'}) {
+ $no = [ $no ] if ref($no) ne 'ARRAY';
+ my @domains;
+ foreach my $n (@$no) {
+ my @n = split(/\s+/, $n);
+ foreach (@n) {
+ if (/\S+@\S+/) { # full address
+ $self->{-no_obfuscate}->{lc $_} = 1;
+ } else {
+ # allow "example.com" or "@example.com"
+ s/\A@//;
+ push @domains, quotemeta($_);
+ }
+ }
+ }
+ my $nod = join('|', @domains);
+ $self->{-no_obfuscate_re} = qr/(?:$nod)\z/i;
+ }
+
$self;
}
@@ -127,6 +148,7 @@ sub git_config_dump {
}
}
close $fh or die "failed to close ($cmd) pipe: $?";
+
\%rv;
}
@@ -151,7 +173,6 @@ sub _fill {
warn "Ignoring $pfx.$k=$v in config, not boolean\n";
}
}
-
# TODO: more arrays, we should support multi-value for
# more things to encourage decentralization
foreach my $k (qw(address altid nntpmirror)) {
@@ -166,11 +187,21 @@ sub _fill {
$rv->{name} = $name;
$rv->{-pi_config} = $self;
$rv = PublicInbox::Inbox->new($rv);
- $self->{-by_addr}->{lc($_)} = $rv foreach @{$rv->{address}};
+ foreach (@{$rv->{address}}) {
+ my $lc_addr = lc($_);
+ $self->{-by_addr}->{$lc_addr} = $rv;
+ $self->{-no_obfuscate}->{$lc_addr} = 1;
+ }
if (my $ng = $rv->{newsgroup}) {
$self->{-by_newsgroup}->{$ng} = $rv;
}
$self->{-by_name}->{$name} = $rv;
+ if ($rv->{obfuscate}) {
+ $rv->{-no_obfuscate} = $self->{-no_obfuscate};
+ $rv->{-no_obfuscate_re} = $self->{-no_obfuscate_re};
+ each_inbox($self, sub {}); # noop to populate -no_obfuscate
+ }
+ $rv
}
1;
diff --git a/lib/PublicInbox/Hval.pm b/lib/PublicInbox/Hval.pm
index 2379b91..8005088 100644
--- a/lib/PublicInbox/Hval.pm
+++ b/lib/PublicInbox/Hval.pm
@@ -91,6 +91,19 @@ sub prurl {
# ․ · and ͺ were also candidates:
# https://public-inbox.org/meta/20170615015250.GA6484@starla/
# However, • was chosen to make copy+paste errors more obvious
-sub obfuscate_addrs ($) { $_[0] =~ s/(\S+@[^\.]+)\./$1•/g }
+sub obfuscate_addrs ($$) {
+ my $ibx = $_[0];
+ my $re = $ibx->{-no_obfuscate_re}; # regex of domains
+ my $addrs = $ibx->{-no_obfuscate}; # { adddress => 1 }
+ $_[1] =~ s/([\w\.\+=\-]+\@([\w\-]+\.[\w\.\-]+))/
+ my ($addr, $domain) = ($1, $2);
+ if ($addrs->{$addr} || ((defined $re && $domain =~ $re))) {
+ $addr;
+ } else {
+ $addr =~ s!([^\.]+)\.!$1•!g;
+ $addr
+ }
+ /sge;
+}
1;
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index 777710e..a597403 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -89,7 +89,8 @@ sub mset_summary {
my $pfx = ' ' x $pad;
my $res = \($ctx->{-html_tip});
my $srch = $ctx->{srch};
- my $obfs = $ctx->{-inbox}->{obfuscate};
+ my $ibx = $ctx->{-inbox};
+ my $obfs_ibx = $ibx->{obfuscate} ? $ibx : undef;
foreach my $m ($mset->items) {
my $rank = sprintf("%${pad}d", $m->get_rank + 1);
my $pct = $m->get_percent;
@@ -103,9 +104,9 @@ sub mset_summary {
}
my $s = ascii_html($smsg->subject);
my $f = ascii_html($smsg->from_name);
- if ($obfs) {
- obfuscate_addrs($s);
- obfuscate_addrs($f);
+ if ($obfs_ibx) {
+ obfuscate_addrs($obfs_ibx, $s);
+ obfuscate_addrs($obfs_ibx, $f);
}
my $ts = PublicInbox::View::fmt_ts($smsg->ts);
my $mid = PublicInbox::Hval->new_msgid($smsg->mid)->{href};
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 388207c..e96f773 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -24,12 +24,13 @@ sub th_pfx ($) { $_[0] == 0 ? '' : TCHILD };
sub msg_html {
my ($ctx, $mime) = @_;
my $hdr = $mime->header_obj;
- my $obfs = $ctx->{-inbox}->{obfuscate};
- my $tip = _msg_html_prepare($hdr, $ctx, $obfs);
+ my $ibx = $ctx->{-inbox};
+ my $obfs_ibx = $ibx->{obfuscate} ? $ibx : undef;
+ my $tip = _msg_html_prepare($hdr, $ctx, $obfs_ibx);
PublicInbox::WwwStream->response($ctx, 200, sub {
my ($nr, undef) = @_;
if ($nr == 1) {
- $tip . multipart_text_as_html($mime, '', $obfs) .
+ $tip . multipart_text_as_html($mime, '', $obfs_ibx) .
'</pre><hr>'
} elsif ($nr == 2) {
# fake an EOF if generating the footer fails;
@@ -138,11 +139,11 @@ sub index_entry {
my $root_anchor = $ctx->{root_anchor} || '';
my $irt;
- my $obfs = $ctx->{-obfuscate};
+ my $obfs_ibx = $ctx->{-obfs_ibx};
my $rv = "<a\nhref=#e$id\nid=m$id>*</a> ";
$subj = '<b>'.ascii_html($subj).'</b>';
- obfuscate_addrs($subj) if $obfs;
+ obfuscate_addrs($obfs_ibx, $subj) if $obfs_ibx;
$subj = "<u\nid=u>$subj</u>" if $root_anchor eq $id_m;
$rv .= $subj . "\n";
$rv .= _th_index_lite($mid_raw, \$irt, $id, $ctx);
@@ -150,12 +151,12 @@ sub index_entry {
foreach my $f (qw(To Cc)) {
my $dst = _hdr_names_html($hdr, $f);
if ($dst ne '') {
- obfuscate_addrs($dst) if $obfs;
+ obfuscate_addrs($obfs_ibx, $dst) if $obfs_ibx;
push @tocc, "$f: $dst";
}
}
my $from = _hdr_names_html($hdr, 'From');
- obfuscate_addrs($from) if $obfs;
+ obfuscate_addrs($obfs_ibx, $from) if $obfs_ibx;
$rv .= "From: $from @ "._msg_date($hdr)." UTC";
my $upfx = $ctx->{-upfx};
my $mhref = $upfx . mid_escape($mid_raw) . '/';
@@ -173,7 +174,7 @@ sub index_entry {
$rv .= "\n";
# scan through all parts, looking for displayable text
- msg_iter($mime, sub { $rv .= add_text_body($mhref, $obfs, $_[0]) });
+ msg_iter($mime, sub { $rv .= add_text_body($mhref, $obfs_ibx, $_[0]) });
# add the footer
$rv .= "\n<a\nhref=#$id_m\nid=e$id>^</a> ".
@@ -319,7 +320,7 @@ sub stream_thread ($$) {
}
return missing_thread($ctx) unless $mime;
- $ctx->{-obfuscate} = $ctx->{-inbox}->{obfuscate};
+ $ctx->{-obfs_ibx} = $inbox->{obfuscate} ? $inbox : undef;
$mime = PublicInbox::MIME->new($mime);
$ctx->{-title_html} = ascii_html($mime->header('Subject'));
$ctx->{-html_tip} = thread_index_entry($ctx, $level, $mime);
@@ -374,14 +375,14 @@ sub thread_html {
my $rootset = thread_results($msgs);
# reduce hash lookups in pre_thread->skel_dump
- $ctx->{-obfuscate} = $ctx->{-inbox}->{obfuscate};
+ my $inbox = $ctx->{-inbox};
+ $ctx->{-obfs_ibx} = $inbox->{obfuscate} ? $inbox : undef;
walk_thread($rootset, $ctx, *pre_thread);
$skel .= '</pre>';
return stream_thread($rootset, $ctx) unless $ctx->{flat};
# flat display: lazy load the full message from smsg
- my $inbox = $ctx->{-inbox};
my $mime;
while ($mime = shift @$msgs) {
$mime = $inbox->msg_by_smsg($mime) and last;
@@ -406,11 +407,11 @@ sub thread_html {
}
sub multipart_text_as_html {
- my ($mime, $upfx, $obfs) = @_;
+ my ($mime, $upfx, $obfs_ibx) = @_;
my $rv = "";
# scan through all parts, looking for displayable text
- msg_iter($mime, sub { $rv .= add_text_body($upfx, $obfs, $_[0]) });
+ msg_iter($mime, sub { $rv .= add_text_body($upfx, $obfs_ibx, $_[0]) });
$rv;
}
@@ -463,7 +464,7 @@ sub attach_link ($$$$;$) {
}
sub add_text_body {
- my ($upfx, $obfs, $p) = @_;
+ my ($upfx, $obfs_ibx, $p) = @_;
# $p - from msg_iter: [ Email::MIME, depth, @idx ]
my ($part, $depth) = @$p; # attachment @idx is unused
my $ct = $part->content_type || 'text/plain';
@@ -515,10 +516,10 @@ sub add_text_body {
if (@quot) { # ugh, top posted
flush_quote(\$s, $l, \@quot);
- obfuscate_addrs($s) if $obfs;
+ obfuscate_addrs($obfs_ibx, $s) if $obfs_ibx;
$s;
} else {
- obfuscate_addrs($s) if $obfs;
+ obfuscate_addrs($obfs_ibx, $s) if $obfs_ibx;
if ($s =~ /\n\z/s) { # common, last line ends with a newline
$s;
} else { # some editors don't do newlines...
@@ -528,7 +529,7 @@ sub add_text_body {
}
sub _msg_html_prepare {
- my ($hdr, $ctx, $obfs) = @_;
+ my ($hdr, $ctx, $obfs_ibx) = @_;
my $srch = $ctx->{srch} if $ctx;
my $atom = '';
my $rv = "<pre\nid=b>"; # anchor for body start
@@ -547,7 +548,7 @@ sub _msg_html_prepare {
if ($h eq 'From') {
my @n = PublicInbox::Address::names($v->raw);
$title[1] = ascii_html(join(', ', @n));
- obfuscate_addrs($title[1]) if $obfs;
+ obfuscate_addrs($obfs_ibx, $title[1]) if $obfs_ibx;
} elsif ($h eq 'Subject') {
$title[0] = $v->as_html;
if ($srch) {
@@ -557,7 +558,7 @@ sub _msg_html_prepare {
}
}
$v = $v->as_html;
- obfuscate_addrs($v) if $obfs;
+ obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx;
$rv .= "$h: $v\n";
}
@@ -605,7 +606,8 @@ sub thread_skel {
$sres = load_results($srch, $sres);
# reduce hash lookups in skel_dump
- $ctx->{-obfuscate} = $ctx->{-inbox}->{obfuscate};
+ my $ibx = $ctx->{-inbox};
+ $ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef;
walk_thread(thread_results($sres), $ctx, *skel_dump);
$ctx->{parent_msg} = $parent;
@@ -763,8 +765,8 @@ sub skel_dump {
my $mid = $smsg->{mid};
my $f = ascii_html($smsg->from_name);
- my $obfs = $ctx->{-obfuscate};
- obfuscate_addrs($f) if $obfs;
+ my $obfs_ibx = $ctx->{-obfs_ibx};
+ obfuscate_addrs($obfs_ibx, $f) if $obfs_ibx;
my $d = fmt_ts($smsg->{ts}) . ' ' . indent_for($level) . th_pfx($level);
my $attr = $f;
@@ -799,7 +801,7 @@ sub skel_dump {
$ctx->{seen}->{$h} = 1;
$subj = PublicInbox::Hval->new($subj);
$subj = $subj->as_html;
- obfuscate_addrs($subj) if $obfs;
+ obfuscate_addrs($obfs_ibx, $subj) if $obfs_ibx;
}
my $m;
my $id = '';
@@ -896,7 +898,8 @@ sub dump_topics {
}
my @out;
- my $obfs = $ctx->{-inbox}->{obfuscate};
+ my $ibx = $ctx->{-inbox};
+ my $obfs_ibx = $ibx->{obfuscate} ? $ibx : undef;
# sort by recency, this allows new posts to "bump" old topics...
foreach my $topic (sort { $b->[0] <=> $a->[0] } @$order) {
@@ -928,7 +931,7 @@ sub dump_topics {
my $subj = $ex[$i + 1];
$mid = delete $seen->{$subj};
$subj = ascii_html($subj);
- obfuscate_addrs($subj) if $obfs;
+ obfuscate_addrs($obfs_ibx, $subj) if $obfs_ibx;
$href = mid_escape($mid);
$s .= indent_for($level) . TCHILD;
$s .= "<a\nhref=\"$href/T/#u\">$subj</a>\n";
diff --git a/t/config.t b/t/config.t
index 437f1d1..353dac6 100644
--- a/t/config.t
+++ b/t/config.t
@@ -86,4 +86,32 @@ my $tmpdir = tempdir('pi-config-XXXXXX', TMPDIR => 1, CLEANUP => 1);
is($ibx->{nntpserver}, 'news.alt.example.com','per-inbox NNTP server');
}
+# no obfuscate domains
+{
+ my $pfx = "publicinbox.test";
+ my $pfx2 = "publicinbox.foo";
+ my %h = (
+ "$pfx.address" => 'test@example.com',
+ "$pfx.mainrepo" => '/path/to/non/existent',
+ "$pfx2.address" => 'foo@example.com',
+ "$pfx2.mainrepo" => '/path/to/foo',
+ lc("publicinbox.noObfuscate") =>
+ 'public-inbox.org @example.com z@EXAMPLE.com',
+ "$pfx.obfuscate" => 'true', # :<
+ );
+ my %tmp = %h;
+ my $cfg = PublicInbox::Config->new(\%tmp);
+ my $ibx = $cfg->lookup_name('test');
+ my $re = $ibx->{-no_obfuscate_re};
+ like('meta@public-inbox.org', $re,
+ 'public-inbox.org address not to be obfuscated');
+ like('t@example.com', $re, 'example.com address not to be obfuscated');
+ unlike('t@example.comM', $re, 'example.comM address does not match');
+ is_deeply($ibx->{-no_obfuscate}, {
+ 'test@example.com' => 1,
+ 'foo@example.com' => 1,
+ 'z@example.com' => 1,
+ }, 'known addresses populated');
+}
+
done_testing();
diff --git a/t/hval.t b/t/hval.t
new file mode 100644
index 0000000..dcbd838
--- /dev/null
+++ b/t/hval.t
@@ -0,0 +1,33 @@
+# Copyright (C) 2017 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Test::More;
+use_ok 'PublicInbox::Hval';
+
+my $ibx = {
+ -no_obfuscate_re => qr/(?:example\.com)\z/i,
+ -no_obfuscate => {
+ 'meta@public-inbox.org' => 1,
+ }
+};
+
+my $html = <<'EOF';
+hello@example.comm
+hello@example.com
+meta@public-inbox.org
+test@public-inbox.org
+EOF
+
+PublicInbox::Hval::obfuscate_addrs($ibx, $html);
+
+my $exp = <<'EOF';
+hello@example•comm
+hello@example.com
+meta@public-inbox.org
+test@public-inbox•org
+EOF
+
+is($html, $exp, 'only obfuscated relevant addresses');
+
+done_testing();
--
EW
^ permalink raw reply related [flat|nested] 3+ messages in thread