From 25f118cab79ec613abd24a95b097c51d4a2c7b0b Mon Sep 17 00:00:00 2001 From: Vivien Kraus Date: Tue, 3 Aug 2021 23:50:37 +0200 Subject: [PATCH 4/4] Use wcwidth to compute the textual port column The wcwidth function computes the number of columns that a character should span. By using it in the textual port column computation, we increase the precision of error reporting [1]. [1] https://www.gnu.org/prep/standards/html_node/Errors.html#Errors --- doc/ref/api-io.texi | 4 +- libguile/ports.c | 28 ++++++++----- module/ice-9/suspendable-ports.scm | 43 +++++++++----------- test-suite/tests/ports.test | 65 +++++++++++++++++++++++++++--- 4 files changed, 101 insertions(+), 39 deletions(-) diff --git a/doc/ref/api-io.texi b/doc/ref/api-io.texi index 87d4af496..70c7fdc61 100644 --- a/doc/ref/api-io.texi +++ b/doc/ref/api-io.texi @@ -503,7 +503,9 @@ The @code{put-string} procedure returns an unspecified value. Textual ports have a textual position associated with them: a line and a column. Reading in characters or writing them out advances the line and -the column appropriately. +the column appropriately. The textual position of a port increases +monotonically, and printable ASCII characters increase the position +strictly. @deffn {Scheme Procedure} port-column port @deffnx {Scheme Procedure} port-line port diff --git a/libguile/ports.c b/libguile/ports.c index c25c20709..d1d07819c 100644 --- a/libguile/ports.c +++ b/libguile/ports.c @@ -38,6 +38,7 @@ #include #include #include +#include #ifdef HAVE_IO_H #include @@ -1704,13 +1705,8 @@ update_port_position (SCM position, scm_t_wchar c) switch (c) { - case '\a': case EOF: break; - case '\b': - if (column > 0) - scm_port_position_set_column (position, scm_from_int (column - 1)); - break; case '\n': { long line = scm_to_long (scm_port_position_line (position)); @@ -1718,15 +1714,29 @@ update_port_position (SCM position, scm_t_wchar c) scm_port_position_set_column (position, SCM_INUM0); } break; - case '\r': - scm_port_position_set_column (position, SCM_INUM0); - break; case '\t': scm_port_position_set_column (position, scm_from_int (column + 8 - column % 8)); break; default: - scm_port_position_set_column (position, scm_from_int (column + 1)); + { + int width = 1; + if (c < 32) + { + /* non-printable ASCII character */ + width = 0; + } + if (c >= 128) + { + /* non-ASCII character */ + width = wcwidth (c); + if (width == -1) + { + width = 1; + } + } + scm_port_position_set_column (position, scm_from_int (column + width)); + } break; } } diff --git a/module/ice-9/suspendable-ports.scm b/module/ice-9/suspendable-ports.scm index a823f1d37..73e80afb4 100644 --- a/module/ice-9/suspendable-ports.scm +++ b/module/ice-9/suspendable-ports.scm @@ -559,29 +559,26 @@ (lambda (buf bv cur buffered) (slow-path)))) (define-inlinable (advance-port-position! pos char) - ;; FIXME: this cond is a speed hack; really we should just compile - ;; `case' better. - (cond - ;; FIXME: char>? et al should compile well. - ((<= (char->integer #\space) (char->integer char)) - (set-port-position-column! pos (1+ (port-position-column pos)))) - (else - (case char - ((#\alarm) #t) ; No change. - ((#\backspace) - (let ((col (port-position-column pos))) - (when (> col 0) - (set-port-position-column! pos (1- col))))) - ((#\newline) - (set-port-position-line! pos (1+ (port-position-line pos))) - (set-port-position-column! pos 0)) - ((#\return) - (set-port-position-column! pos 0)) - ((#\tab) - (let ((col (port-position-column pos))) - (set-port-position-column! pos (- (+ col 8) (remainder col 8))))) - (else - (set-port-position-column! pos (1+ (port-position-column pos)))))))) + (if (>= (char->integer char) 128) + ;; non-ASCII character + (let ((width (wcwidth char))) + (set-port-position-column! + pos + (+ (port-position-column pos) + (if (= width -1) 1 width)))) + ;; ASCII character + (case char + ((#\newline) + (set-port-position-line! pos (1+ (port-position-line pos))) + (set-port-position-column! pos 0)) + ((#\tab) + (let ((col (port-position-column pos))) + (set-port-position-column! pos (- (+ col 8) (remainder col 8))))) + (else + ;; non-printable ASCII characters have 0 width + (when (>= (char->integer char) 32) + ;; printable ASCII character + (set-port-position-column! pos (1+ (port-position-column pos)))))))) (define* (read-char #:optional (port (current-input-port))) (define (finish buf char) diff --git a/test-suite/tests/ports.test b/test-suite/tests/ports.test index cd87640ab..276e3c4de 100644 --- a/test-suite/tests/ports.test +++ b/test-suite/tests/ports.test @@ -1210,7 +1210,7 @@ (pass-if "x\\x08 backspace" (let ((port (open-output-string))) (display "x\x08" port) - (= 0 (port-column port)))) + (= 1 (port-column port)))) (pass-if "\\n" (let ((port (open-output-string))) @@ -1230,7 +1230,7 @@ (pass-if "x\\r" (let ((port (open-output-string))) (display "x\r" port) - (= 0 (port-column port)))) + (= 1 (port-column port)))) (pass-if "\\t" (let ((port (open-output-string))) @@ -1240,7 +1240,31 @@ (pass-if "x\\t" (let ((port (open-output-string))) (display "x\t" port) - (= 8 (port-column port))))) + (= 8 (port-column port)))) + + (pass-if "漢字 in C locale" + (setlocale LC_CTYPE "C") + (let ((port (open-output-string))) + (display "漢字" port) + (= 2 (port-column port)))) + + (pass-if "x漢字 in C locale" + (setlocale LC_CTYPE "C") + (let ((port (open-output-string))) + (display "x漢字" port) + (= 3 (port-column port)))) + + (pass-if "漢字 in UTF-8 locale" + (setlocale LC_CTYPE "en_US.UTF-8") + (let ((port (open-output-string))) + (display "漢字" port) + (= 4 (port-column port)))) + + (pass-if "x漢字 in UTF-8 locale" + (setlocale LC_CTYPE "en_US.UTF-8") + (let ((port (open-output-string))) + (display "x漢字" port) + (= 5 (port-column port))))) (with-test-prefix "input" @@ -1267,7 +1291,7 @@ (pass-if "x\\x08 backspace" (let ((port (open-input-string "x\x08"))) (while (not (eof-object? (read-char port)))) - (= 0 (port-column port)))) + (= 1 (port-column port)))) (pass-if "\\n" (let ((port (open-input-string "\n"))) @@ -1287,7 +1311,7 @@ (pass-if "x\\r" (let ((port (open-input-string "x\r"))) (while (not (eof-object? (read-char port)))) - (= 0 (port-column port)))) + (= 1 (port-column port)))) (pass-if "\\t" (let ((port (open-input-string "\t"))) @@ -1297,7 +1321,36 @@ (pass-if "x\\t" (let ((port (open-input-string "x\t"))) (while (not (eof-object? (read-char port)))) - (= 8 (port-column port)))))) + (= 8 (port-column port)))) + + (pass-if "漢字 in C locale" + (setlocale LC_CTYPE "C") + (let ((port (open-input-string "漢字"))) + (while (not (eof-object? (read-char port)))) + ;; There are 2 characters, but wcwidth failed so it + ;; returned 2. + (= 2 (port-column port)))) + + (pass-if "x漢字 in C locale" + (setlocale LC_CTYPE "C") + (let ((port (open-input-string "x漢字"))) + (while (not (eof-object? (read-char port)))) + (= 3 (port-column port)))) + + (pass-if "漢字 in UTF-8 locale" + (setlocale LC_CTYPE "en_US.UTF-8") + (let ((port (open-input-string "漢字"))) + (while (not (eof-object? (read-char port)))) + (setlocale LC_CTYPE "C") + ;; there are only 2 characters, but 4 columns + (= 4 (port-column port)))) + + (pass-if "x漢字 in UTF-8 locale" + (setlocale LC_CTYPE "en_US.UTF-8") + (let ((port (open-input-string "x漢字"))) + (while (not (eof-object? (read-char port)))) + (setlocale LC_CTYPE "C") + (= 5 (port-column port)))))) (with-test-prefix "port-line" -- 2.32.0