* [PATCH] www: diff: fix encoding problems when showing diff
@ 2023-01-31 10:37 Eric Wong
2023-01-31 10:43 ` oops on the resend :x Eric Wong
0 siblings, 1 reply; 2+ messages in thread
From: Eric Wong @ 2023-01-31 10:37 UTC (permalink / raw)
To: meta
We need to use the utf8 layer when writing files to be diffed,
and utf8::decode the `git diff' output. Furthermore, do the
CRLF > LF conversion early to avoid showing CRLF vs LF
differences in the diff, since that doesn't matter to MUAs
(nor our normal HTML views)
---
lib/PublicInbox/MailDiff.pm | 8 +++++---
t/psgi_v2.t | 40 ++++++++++++++++++++++++++++++++++++-
2 files changed, 44 insertions(+), 4 deletions(-)
diff --git a/lib/PublicInbox/MailDiff.pm b/lib/PublicInbox/MailDiff.pm
index a0ecef9f..7511144c 100644
--- a/lib/PublicInbox/MailDiff.pm
+++ b/lib/PublicInbox/MailDiff.pm
@@ -19,8 +19,10 @@ sub write_part { # Eml->each_part callback
my $ct = $part->content_type || 'text/plain';
my ($s, $err) = msg_part_text($part, $ct);
my $sfx = defined($s) ? 'txt' : 'bin';
- open my $fh, '>', "$self->{curdir}/$idx.$sfx" or die "open: $!";
- print $fh ($s // $part->body) or die "print $!";
+ $s //= $part->body;
+ $s =~ s/\r+\n/\n/sg;
+ open my $fh, '>:utf8', "$self->{curdir}/$idx.$sfx" or die "open: $!";
+ print $fh $s or die "print $!";
close $fh or die "close $!";
}
@@ -66,9 +68,9 @@ sub next_smsg ($) {
sub emit_msg_diff {
my ($bref, $self) = @_; # bref is `git diff' output
# will be escaped to `•' in HTML
+ utf8::decode($$bref);
$self->{ctx}->{ibx}->{obfuscate} and
obfuscate_addrs($self->{ctx}->{ibx}, $$bref, "\x{2022}");
- $$bref =~ s/\r+\n/\n/sg;
print { $self->{ctx}->{zfh} } '</pre><hr><pre>' if $self->{nr} > 1;
flush_diff($self->{ctx}, $bref);
next_smsg($self);
diff --git a/t/psgi_v2.t b/t/psgi_v2.t
index f709c3c7..5b197a9f 100644
--- a/t/psgi_v2.t
+++ b/t/psgi_v2.t
@@ -13,6 +13,36 @@ require_mods(qw(DBD::SQLite Search::Xapian HTTP::Request::Common Plack::Test
use_ok($_) for (qw(HTTP::Request::Common Plack::Test));
use_ok 'PublicInbox::WWW';
my ($tmpdir, $for_destroy) = tmpdir();
+my $enc_dup = 'ref-20150309094050.GO3427@x1.example';
+
+my $dibx = create_inbox 'v2-dup', version => 2, indexlevel => 'medium',
+ tmpdir => "$tmpdir/dup", sub {
+ my ($im, $ibx) = @_;
+ my $common = <<"";
+Date: Mon, 9 Mar 2015 09:40:50 +0000
+From: x\@example.com
+To: y\@example.com
+Subject: re
+Message-ID: <$enc_dup>
+MIME-Version: 1.0
+
+ $im->add(PublicInbox::Eml->new($common.<<EOM)) or BAIL_OUT;
+Content-Type: text/plain; charset=utf-8
+Content-Disposition: inline
+Content-Transfer-Encoding: 8bit
+
+cr_mismatch
+pipe \x{e2}\x{94}\x{82} or not
+EOM
+ $im->add(PublicInbox::Eml->new($common.<<EOM)) or BAIL_OUT;
+Content-Type: text/plain; charset="windows-1252"
+Content-Transfer-Encoding: quoted-printable
+
+cr_mismatch\r
+pipe =E2=94=82 or not
+EOM
+};
+
my $eml = PublicInbox::Eml->new(<<'EOF');
From oldbug-pre-a0c07cba0e5d8b6a Fri Oct 2 00:00:00 1993
From: a@example.com
@@ -53,6 +83,9 @@ my $cfgpath = "$ibx->{inboxdir}/pi_config";
[publicinbox "v2test"]
inboxdir = $ibx->{inboxdir}
address = $ibx->{-primary_address}
+[publicinbox "dup"]
+ inboxdir = $dibx->{inboxdir}
+ address = $dibx->{-primary_address}
EOF
close $fh or BAIL_OUT;
}
@@ -221,8 +254,13 @@ my $client1 = sub {
}
like($raw, qr/\b3\+ messages\b/, 'thread overview shown');
- $res = $cb->(GET('/v2test/a-mid@b/d/'));
+ $res = $cb->(GET("/dup/$enc_dup/d/"));
is($res->code, 200, '/d/ (diff) endpoint works');
+ $raw = $res->content;
+ like($raw, qr!</span> cr_mismatch\n!s,
+ 'cr_mismatch is only diff context');
+ like($raw, qr!>\-pipe !s, 'pipe diff del line');
+ like($raw, qr!>\+pipe !s, 'pipe diff ins line');
};
test_psgi(sub { $www->call(@_) }, $client1);
^ permalink raw reply related [flat|nested] 2+ messages in thread
* oops on the resend :x
2023-01-31 10:37 [PATCH] www: diff: fix encoding problems when showing diff Eric Wong
@ 2023-01-31 10:43 ` Eric Wong
0 siblings, 0 replies; 2+ messages in thread
From: Eric Wong @ 2023-01-31 10:43 UTC (permalink / raw)
To: meta
Wasn't meaning to create test cases in this list :x
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2023-01-31 10:44 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-01-31 10:37 [PATCH] www: diff: fix encoding problems when showing diff Eric Wong
2023-01-31 10:43 ` oops on the resend :x Eric Wong
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).