* [PATCH] viewvcs: handle non-UTF-8 commit message
@ 2023-02-21 11:17 Eric Wong
0 siblings, 0 replies; only message in thread
From: Eric Wong @ 2023-02-21 11:17 UTC (permalink / raw)
To: meta
Back in the old days, git didn't store commit encodings
and allowed messages in various encodings to enter history.
Assuming such a commit is UTF-8 trips up s/// operations
on buffers read with the `:utf8' PerlIO layer. So clear
Perl's internal UTF-8 flag if we end up with something
which isn't valid UTF-8
An example is commit 7eb93c89651c47c8095d476251f2e4314656b292
in git.git ([PATCH] Simplify git script, 2005-09-07)
---
lib/PublicInbox/ViewVCS.pm | 4 +++-
t/solver_git.t | 40 +++++++++++++++++++++++++++++++++++---
xt/solver.t | 1 +
3 files changed, 41 insertions(+), 4 deletions(-)
diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm
index 0fb77c06..964b7345 100644
--- a/lib/PublicInbox/ViewVCS.pm
+++ b/lib/PublicInbox/ViewVCS.pm
@@ -157,9 +157,11 @@ sub show_commit_start { # ->psgi_qx callback
}
my $patchid = (split(/ /, $$bref))[0]; # ignore commit
$ctx->{-q_value_html} = "patchid:$patchid" if defined $patchid;
- open my $fh, '<:utf8', "$ctx->{-tmp}/h" or
+ open my $fh, '<', "$ctx->{-tmp}/h" or
die "open $ctx->{-tmp}/h: $!";
chop(my $buf = do { local $/ = "\0"; <$fh> });
+ utf8::decode($buf);
+ utf8::valid($buf) or utf8::encode($buf); # non-UTF-8 commits exist
chomp $buf;
my ($P, $p);
($P, $p, @{$ctx->{cmt_info}}) = split(/\n/, $buf, 9);
diff --git a/t/solver_git.t b/t/solver_git.t
index c65d9785..e8d9feb9 100644
--- a/t/solver_git.t
+++ b/t/solver_git.t
@@ -218,14 +218,13 @@ SKIP: {
my %oid; # (small|big) => OID
my $lk = bless { lock_path => $l }, 'PublicInbox::Lock';
my $acq = $lk->lock_for_scope;
- my $stamp = "$binfoo/stamp";
+ my $stamp = "$binfoo/stamp-";
if (open my $fh, '<', $stamp) {
%oid = map { chomp; split(/=/, $_) } (<$fh>);
} else {
PublicInbox::Import::init_bare($binfoo);
my $cmd = [ qw(git hash-object -w --stdin) ];
my $env = { GIT_DIR => $binfoo };
- open my $fh, '>', "$stamp.$$" or BAIL_OUT;
while (my ($label, $size) = each %bin) {
pipe(my ($rin, $win)) or BAIL_OUT;
my $rout = popen_rd($cmd , $env, { 0 => $rin });
@@ -234,9 +233,33 @@ SKIP: {
close $win or BAIL_OUT;
chomp(my $x = <$rout>);
close $rout or BAIL_OUT "$?";
- print $fh "$label=$x\n" or BAIL_OUT;
$oid{$label} = $x;
}
+
+ open my $null, '<', '/dev/null' or xbail "open /dev/null: $!";
+ my $t = xqx([qw(git mktree)], $env, { 0 => $null });
+ xbail "mktree: $?" if $?;
+ chomp($t);
+ my $non_utf8 = "K\x{e5}g";
+ $env->{GIT_AUTHOR_NAME} = $non_utf8;
+ $env->{GIT_AUTHOR_EMAIL} = 'e@example.com';
+ $env->{GIT_COMMITTER_NAME} = $env->{GIT_AUTHOR_NAME};
+ $env->{GIT_COMMITTER_EMAIL} = $env->{GIT_AUTHOR_EMAIL};
+ my $in = \"$non_utf8\n\nK\x{e5}g\n";
+ my $c = xqx([qw(git commit-tree), $t], $env, { 0 => $in });
+ xbail "commit-tree: $?" if $?;
+ chomp($c);
+ $oid{'iso-8859-1'} = $c;
+
+ $c = xqx([qw(git commit-tree -p), $c, $t], $env, { 0 => $in });
+ xbail "commit-tree: $?" if $?;
+ chomp($c);
+ $oid{'8859-parent'} = $c;
+
+ open my $fh, '>', "$stamp.$$" or BAIL_OUT;
+ while (my ($k, $v) = each %oid) {
+ print $fh "$k=$v\n" or xbail "print: $!";
+ }
close $fh or BAIL_OUT;
rename("$stamp.$$", $stamp) or BAIL_OUT;
}
@@ -331,6 +354,17 @@ EOF
open STDERR, '>&', $olderr or xbail "open: $!";
is($res->code, 200, 'coderepo summary (binfoo)');
ok(!-s "$tmpdir/stderr.log");
+
+ $res = $cb->(GET("/binfoo/$oid{'iso-8859-1'}/s/"));
+ is($res->code, 200, 'ISO-8859-1 commit');
+ like($res->content, qr/Kåg/, 'ISO-8859-1 commit message');
+ ok(!-s "$tmpdir/stderr.log", 'nothing in stderr');
+
+ $res = $cb->(GET("/binfoo/$oid{'8859-parent'}/s/"));
+ is($res->code, 200, 'commit w/ ISO-8859-parent');
+ like($res->content, qr/Kåg/, 'ISO-8859-1 commit message');
+ ok(!-s "$tmpdir/stderr.log", 'nothing in stderr');
+
$res = $cb->(GET('/public-inbox/'));
is($res->code, 200, 'coderepo summary (public-inbox)');
diff --git a/xt/solver.t b/xt/solver.t
index 1b0af3d8..1f004bf5 100644
--- a/xt/solver.t
+++ b/xt/solver.t
@@ -30,6 +30,7 @@ my $todo = {
'96f1c7f/s/', # TODO: b=contrib/completion/git-completion.bash
'b76f2c0/s/?b=po/zh_CN.po',
'c2f3bf071ee90b01f2d629921bb04c4f798f02fa/s/', # tag
+ '7eb93c89651c47c8095d476251f2e4314656b292/s/', # non-UTF-8
],
};
^ permalink raw reply related [flat|nested] only message in thread
only message in thread, other threads:[~2023-02-21 11:17 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-02-21 11:17 [PATCH] viewvcs: handle non-UTF-8 commit message Eric Wong
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).