From 050817a7af5c9b402e13886b443554a446b2836c Mon Sep 17 00:00:00 2001 From: Vivien Kraus Date: Tue, 3 Aug 2021 23:50:37 +0200 Subject: [PATCH 4/4] Use wcwidth to compute the textual port column The wcwidth function computes the number of columns that a character should span. By using it in the textual port column computation, we increase the precision of error reporting [1]. [1] https://www.gnu.org/prep/standards/html_node/Errors.html#Errors --- doc/ref/api-io.texi | 3 +- libguile/ports.c | 22 ++++---- module/ice-9/suspendable-ports.scm | 38 ++++++-------- test-suite/tests/ports.test | 81 ++++++++++++++++++++++++------ 4 files changed, 97 insertions(+), 47 deletions(-) diff --git a/doc/ref/api-io.texi b/doc/ref/api-io.texi index 87d4af496..b5c95eef1 100644 --- a/doc/ref/api-io.texi +++ b/doc/ref/api-io.texi @@ -503,7 +503,8 @@ The @code{put-string} procedure returns an unspecified value. Textual ports have a textual position associated with them: a line and a column. Reading in characters or writing them out advances the line and -the column appropriately. +the column appropriately. The textual position of a port increases +monotonically, and ASCII characters increase the position strictly. @deffn {Scheme Procedure} port-column port @deffnx {Scheme Procedure} port-line port diff --git a/libguile/ports.c b/libguile/ports.c index c25c20709..0582f7d3a 100644 --- a/libguile/ports.c +++ b/libguile/ports.c @@ -38,6 +38,7 @@ #include #include #include +#include #ifdef HAVE_IO_H #include @@ -1704,13 +1705,8 @@ update_port_position (SCM position, scm_t_wchar c) switch (c) { - case '\a': case EOF: break; - case '\b': - if (column > 0) - scm_port_position_set_column (position, scm_from_int (column - 1)); - break; case '\n': { long line = scm_to_long (scm_port_position_line (position)); @@ -1718,15 +1714,23 @@ update_port_position (SCM position, scm_t_wchar c) scm_port_position_set_column (position, SCM_INUM0); } break; - case '\r': - scm_port_position_set_column (position, SCM_INUM0); - break; case '\t': scm_port_position_set_column (position, scm_from_int (column + 8 - column % 8)); break; default: - scm_port_position_set_column (position, scm_from_int (column + 1)); + { + int width = 1; + if (c >= 128) + { + width = wcwidth (c); + if (width == -1) + { + width = 1; + } + } + scm_port_position_set_column (position, scm_from_int (column + width)); + } break; } } diff --git a/module/ice-9/suspendable-ports.scm b/module/ice-9/suspendable-ports.scm index a823f1d37..e964d65ae 100644 --- a/module/ice-9/suspendable-ports.scm +++ b/module/ice-9/suspendable-ports.scm @@ -559,29 +559,21 @@ (lambda (buf bv cur buffered) (slow-path)))) (define-inlinable (advance-port-position! pos char) - ;; FIXME: this cond is a speed hack; really we should just compile - ;; `case' better. - (cond - ;; FIXME: char>? et al should compile well. - ((<= (char->integer #\space) (char->integer char)) - (set-port-position-column! pos (1+ (port-position-column pos)))) - (else - (case char - ((#\alarm) #t) ; No change. - ((#\backspace) - (let ((col (port-position-column pos))) - (when (> col 0) - (set-port-position-column! pos (1- col))))) - ((#\newline) - (set-port-position-line! pos (1+ (port-position-line pos))) - (set-port-position-column! pos 0)) - ((#\return) - (set-port-position-column! pos 0)) - ((#\tab) - (let ((col (port-position-column pos))) - (set-port-position-column! pos (- (+ col 8) (remainder col 8))))) - (else - (set-port-position-column! pos (1+ (port-position-column pos)))))))) + (if (>= (char->integer char) 128) + (let ((width (wcwidth char))) + (set-port-position-column! + pos + (+ (port-position-column pos) + (if (= width -1) 1 width)))) + (case char + ((#\newline) + (set-port-position-line! pos (1+ (port-position-line pos))) + (set-port-position-column! pos 0)) + ((#\tab) + (let ((col (port-position-column pos))) + (set-port-position-column! pos (- (+ col 8) (remainder col 8))))) + (else + (set-port-position-column! pos (1+ (port-position-column pos))))))) (define* (read-char #:optional (port (current-input-port))) (define (finish buf char) diff --git a/test-suite/tests/ports.test b/test-suite/tests/ports.test index cd87640ab..5b968887e 100644 --- a/test-suite/tests/ports.test +++ b/test-suite/tests/ports.test @@ -1195,22 +1195,22 @@ (pass-if "\\a" (let ((port (open-output-string))) (display "\a" port) - (= 0 (port-column port)))) + (= 1 (port-column port)))) (pass-if "x\\a" (let ((port (open-output-string))) (display "x\a" port) - (= 1 (port-column port)))) + (= 2 (port-column port)))) (pass-if "\\x08 backspace" (let ((port (open-output-string))) (display "\x08" port) - (= 0 (port-column port)))) + (= 1 (port-column port)))) (pass-if "x\\x08 backspace" (let ((port (open-output-string))) (display "x\x08" port) - (= 0 (port-column port)))) + (= 2 (port-column port)))) (pass-if "\\n" (let ((port (open-output-string))) @@ -1225,12 +1225,12 @@ (pass-if "\\r" (let ((port (open-output-string))) (display "\r" port) - (= 0 (port-column port)))) + (= 1 (port-column port)))) (pass-if "x\\r" (let ((port (open-output-string))) (display "x\r" port) - (= 0 (port-column port)))) + (= 2 (port-column port)))) (pass-if "\\t" (let ((port (open-output-string))) @@ -1240,7 +1240,31 @@ (pass-if "x\\t" (let ((port (open-output-string))) (display "x\t" port) - (= 8 (port-column port))))) + (= 8 (port-column port)))) + + (pass-if "漢字 in C locale" + (setlocale LC_CTYPE "C") + (let ((port (open-output-string))) + (display "漢字" port) + (= 2 (port-column port)))) + + (pass-if "x漢字 in C locale" + (setlocale LC_CTYPE "C") + (let ((port (open-output-string))) + (display "x漢字" port) + (= 3 (port-column port)))) + + (pass-if "漢字 in UTF-8 locale" + (setlocale LC_CTYPE "en_US.UTF-8") + (let ((port (open-output-string))) + (display "漢字" port) + (= 4 (port-column port)))) + + (pass-if "x漢字 in UTF-8 locale" + (setlocale LC_CTYPE "en_US.UTF-8") + (let ((port (open-output-string))) + (display "x漢字" port) + (= 5 (port-column port))))) (with-test-prefix "input" @@ -1252,22 +1276,22 @@ (pass-if "\\a" (let ((port (open-input-string "\a"))) (while (not (eof-object? (read-char port)))) - (= 0 (port-column port)))) + (= 1 (port-column port)))) (pass-if "x\\a" (let ((port (open-input-string "x\a"))) (while (not (eof-object? (read-char port)))) - (= 1 (port-column port)))) + (= 2 (port-column port)))) (pass-if "\\x08 backspace" (let ((port (open-input-string "\x08"))) (while (not (eof-object? (read-char port)))) - (= 0 (port-column port)))) + (= 1 (port-column port)))) (pass-if "x\\x08 backspace" (let ((port (open-input-string "x\x08"))) (while (not (eof-object? (read-char port)))) - (= 0 (port-column port)))) + (= 2 (port-column port)))) (pass-if "\\n" (let ((port (open-input-string "\n"))) @@ -1282,12 +1306,12 @@ (pass-if "\\r" (let ((port (open-input-string "\r"))) (while (not (eof-object? (read-char port)))) - (= 0 (port-column port)))) + (= 1 (port-column port)))) (pass-if "x\\r" (let ((port (open-input-string "x\r"))) (while (not (eof-object? (read-char port)))) - (= 0 (port-column port)))) + (= 2 (port-column port)))) (pass-if "\\t" (let ((port (open-input-string "\t"))) @@ -1297,7 +1321,36 @@ (pass-if "x\\t" (let ((port (open-input-string "x\t"))) (while (not (eof-object? (read-char port)))) - (= 8 (port-column port)))))) + (= 8 (port-column port)))) + + (pass-if "漢字 in C locale" + (setlocale LC_CTYPE "C") + (let ((port (open-input-string "漢字"))) + (while (not (eof-object? (read-char port)))) + ;; There are 2 characters, but wcwidth failed so it + ;; returned 2. + (= 2 (port-column port)))) + + (pass-if "x漢字 in C locale" + (setlocale LC_CTYPE "C") + (let ((port (open-input-string "x漢字"))) + (while (not (eof-object? (read-char port)))) + (= 3 (port-column port)))) + + (pass-if "漢字 in UTF-8 locale" + (setlocale LC_CTYPE "en_US.UTF-8") + (let ((port (open-input-string "漢字"))) + (while (not (eof-object? (read-char port)))) + (setlocale LC_CTYPE "C") + ;; there are only 2 characters, but 4 columns + (= 4 (port-column port)))) + + (pass-if "x漢字 in UTF-8 locale" + (setlocale LC_CTYPE "en_US.UTF-8") + (let ((port (open-input-string "x漢字"))) + (while (not (eof-object? (read-char port)))) + (setlocale LC_CTYPE "C") + (= 5 (port-column port)))))) (with-test-prefix "port-line" -- 2.32.0