unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
From: "Eric Wong (Contractor, The Linux Foundation)" <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH] truncate Message-IDs and References consistently
Date: Sun,  1 Apr 2018 23:23:44 +0000	[thread overview]
Message-ID: <20180401232344.5317-1-e@80x24.org> (raw)

We need to stop ghost messages from generating longer
Message-IDs than Xapian can handle with terms.
---
 lib/PublicInbox/MID.pm | 18 ++++++++----------
 t/v2writable.t         | 22 ++++++++++++++++++++++
 2 files changed, 30 insertions(+), 10 deletions(-)

diff --git a/lib/PublicInbox/MID.pm b/lib/PublicInbox/MID.pm
index 117d3c4..c82e840 100644
--- a/lib/PublicInbox/MID.pm
+++ b/lib/PublicInbox/MID.pm
@@ -65,12 +65,6 @@ sub mids ($) {
 			push(@mids, $v);
 		}
 	}
-	foreach my $i (0..$#mids) {
-		next if length($mids[$i]) <= MAX_MID_SIZE;
-		warn "Message-ID: <$mids[$i]> too long, truncating\n";
-		$mids[$i] = substr($mids[$i], 0, MAX_MID_SIZE);
-	}
-
 	uniq_mids(\@mids);
 }
 
@@ -92,10 +86,14 @@ sub uniq_mids ($) {
 	my ($mids) = @_;
 	my @ret;
 	my %seen;
-	foreach (@$mids) {
-		next if $seen{$_};
-		push @ret, $_;
-		$seen{$_} = 1;
+	foreach my $mid (@$mids) {
+		if (length($mid) > MAX_MID_SIZE) {
+			warn "Message-ID: <$mid> too long, truncating\n";
+			$mid = substr($mid, 0, MAX_MID_SIZE);
+		}
+		next if $seen{$mid};
+		push @ret, $mid;
+		$seen{$mid} = 1;
 	}
 	\@ret;
 }
diff --git a/t/v2writable.t b/t/v2writable.t
index 4a7cfb9..7e29ef7 100644
--- a/t/v2writable.t
+++ b/t/v2writable.t
@@ -235,4 +235,26 @@ EOF
 	$im->done;
 }
 
+{
+	my @warn;
+	my $x = 'x'x250;
+	my $y = 'y'x250;
+	local $SIG{__WARN__} = sub { push @warn, @_ };
+	$mime->header_set('Subject', 'long mid');
+	$mime->header_set('Message-ID', "<$x>");
+	ok($im->add($mime), 'add excessively long Message-ID');
+
+	$mime->header_set('Message-ID', "<$y>");
+	$mime->header_set('References', "<$x>");
+	ok($im->add($mime), 'add excessively long References');
+	$im->barrier;
+
+	my $msgs = $ibx->search->reopen->get_thread('x'x244)->{msgs};
+	is(2, scalar(@$msgs), 'got both messages');
+	is($msgs->[0]->{mid}, 'x'x244, 'stored truncated mid');
+	is($msgs->[1]->{references}, '<'.('x'x244).'>', 'stored truncated ref');
+	is($msgs->[1]->{mid}, 'y'x244, 'stored truncated mid(2)');
+	$im->done;
+}
+
 done_testing();
-- 
EW


                 reply	other threads:[~2018-04-01 23:23 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180401232344.5317-1-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).