* [RFC] mid: filter out 'y', 'n', and email addresses from references()
@ 2019-01-29 7:56 Eric Wong
2019-01-30 1:45 ` Eric Wong
0 siblings, 1 reply; 2+ messages in thread
From: Eric Wong @ 2019-01-29 7:56 UTC (permalink / raw)
To: meta
Looking at git@vger history, several emails had broken
References/In-Reply-To pointing to <y>, <n> and email
addresses as Message-IDs in References and In-Reply-To
headers.
This was causing too many unrelated messaes to be linked
together in the same thread.
---
lib/PublicInbox/MID.pm | 25 +++++++++++++++++++------
t/mid.t | 4 ++++
2 files changed, 23 insertions(+), 6 deletions(-)
diff --git a/lib/PublicInbox/MID.pm b/lib/PublicInbox/MID.pm
index cd56f27..7f1ab15 100644
--- a/lib/PublicInbox/MID.pm
+++ b/lib/PublicInbox/MID.pm
@@ -10,6 +10,7 @@ our @EXPORT_OK = qw/mid_clean id_compress mid2path mid_mime mid_escape MID_ESC
mids references/;
use URI::Escape qw(uri_escape_utf8);
use Digest::SHA qw/sha1_hex/;
+require PublicInbox::Address;
use constant {
MID_MAX => 40, # SHA-1 hex length # TODO: get rid of this
MAX_MID_SIZE => 244, # max term size (Xapian limitation) - length('Q')
@@ -79,22 +80,34 @@ sub references ($) {
push(@mids, ($v =~ /<([^>]+)>/sg));
}
}
- uniq_mids(\@mids);
+
+ # old versions of git-send-email would prompt users for
+ # In-Reply-To and users' muscle memory would use 'y' or 'n'
+ # as responses:
+ my %addr = ( y => 1, n => 1 );
+
+ foreach my $f (qw(To From Cc)) {
+ my @v = $hdr->header_raw($f);
+ foreach my $v (@v) {
+ $addr{$_} = 1 for (PublicInbox::Address::emails($v));
+ }
+ }
+ uniq_mids(\@mids, \%addr);
}
-sub uniq_mids ($) {
- my ($mids) = @_;
+sub uniq_mids ($;$) {
+ my ($mids, $seen) = @_;
my @ret;
- my %seen;
+ $seen ||= {};
foreach my $mid (@$mids) {
$mid =~ tr/\n\t\r//d;
if (length($mid) > MAX_MID_SIZE) {
warn "Message-ID: <$mid> too long, truncating\n";
$mid = substr($mid, 0, MAX_MID_SIZE);
}
- next if $seen{$mid};
+ next if $seen->{$mid};
push @ret, $mid;
- $seen{$mid} = 1;
+ $seen->{$mid} = 1;
}
\@ret;
}
diff --git a/t/mid.t b/t/mid.t
index 8c307c8..69a8a70 100644
--- a/t/mid.t
+++ b/t/mid.t
@@ -36,6 +36,10 @@ is(mid_escape('foo%!@(bar)'), 'foo%25!@(bar)');
$mime->header_set('Message-ID', "<hello\tworld>");
is_deeply(mids($mime->header_obj), ['helloworld'],
'drop \t in Message-ID');
+
+ $mime->header_set('To', 'u@example.com');
+ $mime->header_set('References', '<hello> <world> <n> <u@example.com>');
+ is_deeply(references($mime->header_obj), [qw(hello world)]);
}
done_testing();
--
EW
^ permalink raw reply related [flat|nested] 2+ messages in thread
end of thread, other threads:[~2019-01-30 1:45 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2019-01-29 7:56 [RFC] mid: filter out 'y', 'n', and email addresses from references() Eric Wong
2019-01-30 1:45 ` Eric Wong
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).