From 28c420afab6a0944a192c30ff2d5d9e40c88f14f Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Mon, 23 Dec 2024 13:38:51 -0800 Subject: [PATCH] Avoid U+FFFD in commit messages * build-aux/git-hooks/commit-msg: Also check against U+FFFD REPLACEMENT CHARACTER in commit messages. --- build-aux/git-hooks/commit-msg | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/build-aux/git-hooks/commit-msg b/build-aux/git-hooks/commit-msg index 1eb2560bba2..dace4c7fb66 100755 --- a/build-aux/git-hooks/commit-msg +++ b/build-aux/git-hooks/commit-msg @@ -31,6 +31,8 @@ # Use U+00A2 CENT SIGN to test whether the locale works. cent_sign_utf8_format='\302\242\n' cent_sign=`printf "$cent_sign_utf8_format"` +replacement_character_utf8_format='\357\277\275\n' +replacement_character=`printf "$replacement_character_utf8_format"` print_at_sign='BEGIN {print substr("'$cent_sign'@", 2)}' at_sign=`$awk "$print_at_sign" /dev/null` if test "$at_sign" != @; then @@ -44,7 +46,12 @@ at_sign= fi # Check the log entry. -exec $awk -v at_sign="$at_sign" -v cent_sign="$cent_sign" -v file="$1" ' +exec $awk \ + -v at_sign="$at_sign" \ + -v cent_sign="$cent_sign" \ + -v file="$1" \ + -v replacement_character="$replacement_character" \ +' BEGIN { # These regular expressions assume traditional Unix unibyte behavior. # They are needed for old or broken versions of awk, e.g., @@ -137,6 +144,10 @@ at_sign= print "Unprintable character in commit message" status = 1 } + $0 ~ replacement_character { + print "Replacement character in commit message" + status = 1 + } END { if (nlines == 0) { -- 2.45.2