unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
* [PATCH] truncate Message-IDs and References consistently
@ 2018-04-01 23:23 Eric Wong (Contractor, The Linux Foundation)
  0 siblings, 0 replies; only message in thread
From: Eric Wong (Contractor, The Linux Foundation) @ 2018-04-01 23:23 UTC (permalink / raw)
  To: meta

We need to stop ghost messages from generating longer
Message-IDs than Xapian can handle with terms.
---
 lib/PublicInbox/MID.pm | 18 ++++++++----------
 t/v2writable.t         | 22 ++++++++++++++++++++++
 2 files changed, 30 insertions(+), 10 deletions(-)

diff --git a/lib/PublicInbox/MID.pm b/lib/PublicInbox/MID.pm
index 117d3c4..c82e840 100644
--- a/lib/PublicInbox/MID.pm
+++ b/lib/PublicInbox/MID.pm
@@ -65,12 +65,6 @@ sub mids ($) {
 			push(@mids, $v);
 		}
 	}
-	foreach my $i (0..$#mids) {
-		next if length($mids[$i]) <= MAX_MID_SIZE;
-		warn "Message-ID: <$mids[$i]> too long, truncating\n";
-		$mids[$i] = substr($mids[$i], 0, MAX_MID_SIZE);
-	}
-
 	uniq_mids(\@mids);
 }
 
@@ -92,10 +86,14 @@ sub uniq_mids ($) {
 	my ($mids) = @_;
 	my @ret;
 	my %seen;
-	foreach (@$mids) {
-		next if $seen{$_};
-		push @ret, $_;
-		$seen{$_} = 1;
+	foreach my $mid (@$mids) {
+		if (length($mid) > MAX_MID_SIZE) {
+			warn "Message-ID: <$mid> too long, truncating\n";
+			$mid = substr($mid, 0, MAX_MID_SIZE);
+		}
+		next if $seen{$mid};
+		push @ret, $mid;
+		$seen{$mid} = 1;
 	}
 	\@ret;
 }
diff --git a/t/v2writable.t b/t/v2writable.t
index 4a7cfb9..7e29ef7 100644
--- a/t/v2writable.t
+++ b/t/v2writable.t
@@ -235,4 +235,26 @@ EOF
 	$im->done;
 }
 
+{
+	my @warn;
+	my $x = 'x'x250;
+	my $y = 'y'x250;
+	local $SIG{__WARN__} = sub { push @warn, @_ };
+	$mime->header_set('Subject', 'long mid');
+	$mime->header_set('Message-ID', "<$x>");
+	ok($im->add($mime), 'add excessively long Message-ID');
+
+	$mime->header_set('Message-ID', "<$y>");
+	$mime->header_set('References', "<$x>");
+	ok($im->add($mime), 'add excessively long References');
+	$im->barrier;
+
+	my $msgs = $ibx->search->reopen->get_thread('x'x244)->{msgs};
+	is(2, scalar(@$msgs), 'got both messages');
+	is($msgs->[0]->{mid}, 'x'x244, 'stored truncated mid');
+	is($msgs->[1]->{references}, '<'.('x'x244).'>', 'stored truncated ref');
+	is($msgs->[1]->{mid}, 'y'x244, 'stored truncated mid(2)');
+	$im->done;
+}
+
 done_testing();
-- 
EW


^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2018-04-01 23:23 UTC | newest]

Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2018-04-01 23:23 [PATCH] truncate Message-IDs and References consistently Eric Wong (Contractor, The Linux Foundation)

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).