From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.2 required=3.0 tests=ALL_TRUSTED,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF shortcircuit=no autolearn=ham autolearn_force=no version=3.4.6 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 75A371F63E for ; Tue, 31 Jan 2023 10:38:02 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1675161482; bh=G6K061oWiDpr3e8R4FudsFWq1tVBvSFbqr8o25nLybw=; h=From:To:Subject:Date:From; b=0Dy49zVVL4KwSn9yyTD43FcY2foNa/yzunl3OYtYFf0EYh3e2AxETQpeCzbneHlur YPynhiAZwbuabb1LeAQufAWeLeRij+EwjRbAM3j/0TOIrJO4Y95ib8UKFNCpTZgPmc n6cP/ZMqNF6VJ7X4gdAHcXLsfYGZ2kkn+OUQcnZU= From: Eric Wong To: meta@public-inbox.org Subject: [PATCH] www: diff: fix encoding problems when showing diff Date: Tue, 31 Jan 2023 10:37:32 +0000 Message-Id: <20230131103732.2654245-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: We need to use the utf8 layer when writing files to be diffed, and utf8::decode the `git diff' output. Furthermore, do the CRLF > LF conversion early to avoid showing CRLF vs LF differences in the diff, since that doesn't matter to MUAs (nor our normal HTML views) --- lib/PublicInbox/MailDiff.pm | 8 +++++--- t/psgi_v2.t | 40 ++++++++++++++++++++++++++++++++++++- 2 files changed, 44 insertions(+), 4 deletions(-) diff --git a/lib/PublicInbox/MailDiff.pm b/lib/PublicInbox/MailDiff.pm index a0ecef9f..7511144c 100644 --- a/lib/PublicInbox/MailDiff.pm +++ b/lib/PublicInbox/MailDiff.pm @@ -19,8 +19,10 @@ sub write_part { # Eml->each_part callback my $ct = $part->content_type || 'text/plain'; my ($s, $err) = msg_part_text($part, $ct); my $sfx = defined($s) ? 'txt' : 'bin'; - open my $fh, '>', "$self->{curdir}/$idx.$sfx" or die "open: $!"; - print $fh ($s // $part->body) or die "print $!"; + $s //= $part->body; + $s =~ s/\r+\n/\n/sg; + open my $fh, '>:utf8', "$self->{curdir}/$idx.$sfx" or die "open: $!"; + print $fh $s or die "print $!"; close $fh or die "close $!"; } @@ -66,9 +68,9 @@ sub next_smsg ($) { sub emit_msg_diff { my ($bref, $self) = @_; # bref is `git diff' output # will be escaped to `•' in HTML + utf8::decode($$bref); $self->{ctx}->{ibx}->{obfuscate} and obfuscate_addrs($self->{ctx}->{ibx}, $$bref, "\x{2022}"); - $$bref =~ s/\r+\n/\n/sg; print { $self->{ctx}->{zfh} } '
' if $self->{nr} > 1;
 	flush_diff($self->{ctx}, $bref);
 	next_smsg($self);
diff --git a/t/psgi_v2.t b/t/psgi_v2.t
index f709c3c7..5b197a9f 100644
--- a/t/psgi_v2.t
+++ b/t/psgi_v2.t
@@ -13,6 +13,36 @@ require_mods(qw(DBD::SQLite Search::Xapian HTTP::Request::Common Plack::Test
 use_ok($_) for (qw(HTTP::Request::Common Plack::Test));
 use_ok 'PublicInbox::WWW';
 my ($tmpdir, $for_destroy) = tmpdir();
+my $enc_dup = 'ref-20150309094050.GO3427@x1.example';
+
+my $dibx = create_inbox 'v2-dup', version => 2, indexlevel => 'medium',
+			tmpdir => "$tmpdir/dup", sub {
+	my ($im, $ibx) = @_;
+	my $common = <<"";
+Date: Mon, 9 Mar 2015 09:40:50 +0000
+From: x\@example.com
+To: y\@example.com
+Subject: re
+Message-ID: <$enc_dup>
+MIME-Version: 1.0
+
+	$im->add(PublicInbox::Eml->new($common.<add(PublicInbox::Eml->new($common.<new(<<'EOF');
 From oldbug-pre-a0c07cba0e5d8b6a Fri Oct  2 00:00:00 1993
 From: a@example.com
@@ -53,6 +83,9 @@ my $cfgpath = "$ibx->{inboxdir}/pi_config";
 [publicinbox "v2test"]
 	inboxdir = $ibx->{inboxdir}
 	address = $ibx->{-primary_address}
+[publicinbox "dup"]
+	inboxdir = $dibx->{inboxdir}
+	address = $dibx->{-primary_address}
 EOF
 	close $fh or BAIL_OUT;
 }
@@ -221,8 +254,13 @@ my $client1 = sub {
 	}
 	like($raw, qr/\b3\+ messages\b/, 'thread overview shown');
 
-	$res = $cb->(GET('/v2test/a-mid@b/d/'));
+	$res = $cb->(GET("/dup/$enc_dup/d/"));
 	is($res->code, 200, '/d/ (diff) endpoint works');
+	$raw = $res->content;
+	like($raw, qr! cr_mismatch\n!s,
+		'cr_mismatch is only diff context');
+	like($raw, qr!>\-pipe !s, 'pipe diff del line');
+	like($raw, qr!>\+pipe !s, 'pipe diff ins line');
 };
 
 test_psgi(sub { $www->call(@_) }, $client1);