From: "Eric Wong (Contractor, The Linux Foundation)" <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH] truncate Message-IDs and References consistently
Date: Sun, 1 Apr 2018 23:23:44 +0000 [thread overview]
Message-ID: <20180401232344.5317-1-e@80x24.org> (raw)
We need to stop ghost messages from generating longer
Message-IDs than Xapian can handle with terms.
---
lib/PublicInbox/MID.pm | 18 ++++++++----------
t/v2writable.t | 22 ++++++++++++++++++++++
2 files changed, 30 insertions(+), 10 deletions(-)
diff --git a/lib/PublicInbox/MID.pm b/lib/PublicInbox/MID.pm
index 117d3c4..c82e840 100644
--- a/lib/PublicInbox/MID.pm
+++ b/lib/PublicInbox/MID.pm
@@ -65,12 +65,6 @@ sub mids ($) {
push(@mids, $v);
}
}
- foreach my $i (0..$#mids) {
- next if length($mids[$i]) <= MAX_MID_SIZE;
- warn "Message-ID: <$mids[$i]> too long, truncating\n";
- $mids[$i] = substr($mids[$i], 0, MAX_MID_SIZE);
- }
-
uniq_mids(\@mids);
}
@@ -92,10 +86,14 @@ sub uniq_mids ($) {
my ($mids) = @_;
my @ret;
my %seen;
- foreach (@$mids) {
- next if $seen{$_};
- push @ret, $_;
- $seen{$_} = 1;
+ foreach my $mid (@$mids) {
+ if (length($mid) > MAX_MID_SIZE) {
+ warn "Message-ID: <$mid> too long, truncating\n";
+ $mid = substr($mid, 0, MAX_MID_SIZE);
+ }
+ next if $seen{$mid};
+ push @ret, $mid;
+ $seen{$mid} = 1;
}
\@ret;
}
diff --git a/t/v2writable.t b/t/v2writable.t
index 4a7cfb9..7e29ef7 100644
--- a/t/v2writable.t
+++ b/t/v2writable.t
@@ -235,4 +235,26 @@ EOF
$im->done;
}
+{
+ my @warn;
+ my $x = 'x'x250;
+ my $y = 'y'x250;
+ local $SIG{__WARN__} = sub { push @warn, @_ };
+ $mime->header_set('Subject', 'long mid');
+ $mime->header_set('Message-ID', "<$x>");
+ ok($im->add($mime), 'add excessively long Message-ID');
+
+ $mime->header_set('Message-ID', "<$y>");
+ $mime->header_set('References', "<$x>");
+ ok($im->add($mime), 'add excessively long References');
+ $im->barrier;
+
+ my $msgs = $ibx->search->reopen->get_thread('x'x244)->{msgs};
+ is(2, scalar(@$msgs), 'got both messages');
+ is($msgs->[0]->{mid}, 'x'x244, 'stored truncated mid');
+ is($msgs->[1]->{references}, '<'.('x'x244).'>', 'stored truncated ref');
+ is($msgs->[1]->{mid}, 'y'x244, 'stored truncated mid(2)');
+ $im->done;
+}
+
done_testing();
--
EW
reply other threads:[~2018-04-01 23:23 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180401232344.5317-1-e@80x24.org \
--to=e@80x24.org \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).