From: Andreas Ettner Date: Mon, 9 May 2022 10:05:26 +0200 Subject: [PATCH] Improve internationalization --- libguile/i18n.c | 257 ++++++++++++++++++++++++++++++++----- test-suite/tests/i18n.test | 73 +++++++++-- 2 files changed, 288 insertions(+), 42 deletions(-) diff --git a/libguile/i18n.c b/libguile/i18n.c index 52a8080..609eb0b 100644 --- a/libguile/i18n.c +++ b/libguile/i18n.c @@ -26,20 +26,27 @@ #include /* `strcoll ()' */ #include /* `toupper ()' et al. */ #include +#include +#include +#include #include #include +#include #include "boolean.h" #include "chars.h" #include "dynwind.h" +#include "error.h" #include "extensions.h" #include "feature.h" +#include "gc.h" #include "gsubr.h" #include "list.h" #include "modules.h" #include "numbers.h" #include "pairs.h" #include "posix.h" /* for `scm_i_locale_mutex' */ +#include "scm.h" #include "smob.h" #include "strings.h" #include "symbols.h" @@ -1364,6 +1371,84 @@ SCM_DEFINE (scm_string_locale_titlecase, "string-locale-titlecase", 1, 1, 0, /* Locale-dependent number parsing. */ +/* Yield 1 if the value of the expression A is less or equal to the + value of expression B in the mathematical sense, otherwise yield + 0. The argument expressions may be evaluated multiple times. + + Mind you: `1U <= -1' evaluates to 1 in C. */ +#define SCM_LEQ(a, b) \ + (((a) < 0) ? \ + (((b) < 0) ? \ + ((a) <= (b)) : \ + 1) : \ + (((b) < 0) ? \ + 0 : \ + ((a) <= (b)))) + +/* Copy SIZE elements of array FROM into array TO replacing all not + representable characters with null. Null characters are passed + through. The arrays TO and FROM must both have a length of at + least SIZE and must not overlap. */ +static void +scm_t_wchar_to_wchar_t_array (wchar_t to[], + const scm_t_wchar from[], + size_t size) +{ + size_t i; + + for (i = 0; i < size; i++) + { +#if (SCM_LEQ (WCHAR_MIN, INT32_MIN) && SCM_LEQ (INT32_MAX, WCHAR_MAX)) + /* Optimization: Since `scm_t_wchar' is a signed, 32-bit integer + type (according to section [API Reference | Data Types | + Characters] in the Guile Reference), FROM[I] is in this + case clearly presentable in `wchar_t'. */ + to[i] = (wchar_t) from[i]; +#else + scm_t_wchar ch = from[i]; + to[i] = ((SCM_LEQ (WCHAR_MIN, ch) && SCM_LEQ (ch, WCHAR_MAX)) ? + ((wchar_t) ch) : + L'\0'); +#endif + } +} + +#define SCM_NARROW_STRING_TO_C(str, c_str, c_str_malloc_p) \ + do \ + { \ + size_t len, bytes; \ + \ + len = scm_i_string_length (str); \ + if (!(len < SIZE_MAX)) \ + scm_num_overflow ("SCM_NARROW_STRING_TO_C"); \ + \ + bytes = len + ((size_t) 1); \ + c_str_malloc_p = (bytes > SCM_MAX_ALLOCA); \ + c_str = (c_str_malloc_p ? scm_malloc (bytes) : alloca (bytes)); \ + \ + memcpy (c_str, scm_i_string_chars (str), len); \ + c_str[len] = '\0'; \ + } while (0) + +#define SCM_WIDE_STRING_TO_C(str, c_str, c_str_malloc_p) \ + do \ + { \ + size_t len, bytes; \ + \ + len = scm_i_string_length (str); \ + if (!(len < (SIZE_MAX / sizeof (wchar_t)))) \ + scm_num_overflow ("SCM_WIDE_STRING_TO_C"); \ + \ + bytes = (len + ((size_t) 1)) * sizeof (wchar_t); \ + c_str_malloc_p = (bytes > SCM_MAX_ALLOCA); \ + c_str = (c_str_malloc_p ? scm_malloc (bytes) : alloca (bytes)); \ + \ + scm_t_wchar_to_wchar_t_array (c_str, \ + scm_i_string_wide_chars (str), \ + len); \ + c_str[len] = L'\0'; \ + } while (0) + SCM_DEFINE (scm_locale_string_to_integer, "locale-string->integer", 1, 2, 0, (SCM str, SCM base, SCM locale), "Convert string @var{str} into an integer according to either " @@ -1374,42 +1459,99 @@ SCM_DEFINE (scm_locale_string_to_integer, "locale-string->integer", #define FUNC_NAME s_scm_locale_string_to_integer { SCM result; - long c_result; + SCM char_count; int c_base; - const char *c_str; - char *c_endptr; scm_t_locale c_locale; SCM_VALIDATE_STRING (1, str); - c_str = scm_i_string_chars (str); if (!scm_is_eq (base, SCM_UNDEFINED)) - SCM_VALIDATE_INT_COPY (2, base, c_base); + { + SCM_VALIDATE_INT_COPY (2, base, c_base); + if (!(c_base == 0 || (2 <= c_base && c_base <= 36))) + scm_out_of_range (FUNC_NAME, base); + } else c_base = 10; SCM_VALIDATE_OPTIONAL_LOCALE_COPY (3, locale, c_locale); - if (c_locale != NULL) + if (scm_i_is_narrow_string (str)) { + long c_result; + char *c_str; + int c_str_malloc_p; + char *c_endptr; + + scm_dynwind_begin (0); + { + SCM_NARROW_STRING_TO_C (str, c_str, c_str_malloc_p); + if (c_str_malloc_p) + scm_dynwind_free (c_str); + + if (c_locale != NULL) + { #if defined USE_GNU_LOCALE_API && defined HAVE_STRTOL_L - c_result = strtol_l (c_str, &c_endptr, c_base, c_locale); + c_result = strtol_l (c_str, &c_endptr, c_base, c_locale); #else - RUN_IN_LOCALE_SECTION (c_locale, - c_result = strtol (c_str, &c_endptr, c_base)); + RUN_IN_LOCALE_SECTION (c_locale, + c_result = strtol (c_str, &c_endptr, c_base)); #endif + } + else + c_result = strtol (c_str, &c_endptr, c_base); + + if (c_endptr == c_str) + { + result = SCM_BOOL_F; + char_count = scm_from_int (0); + } + else + { + result = scm_from_long (c_result); + char_count = scm_from_ptrdiff_t (c_endptr - c_str); + } + } + scm_dynwind_end (); } else - c_result = strtol (c_str, &c_endptr, c_base); + { + long c_result; + wchar_t *c_str; + int c_str_malloc_p; + wchar_t *c_endptr; - scm_remember_upto_here (str); + scm_dynwind_begin (0); + { + SCM_WIDE_STRING_TO_C (str, c_str, c_str_malloc_p); + if (c_str_malloc_p) + scm_dynwind_free (c_str); + + if (c_locale != NULL) + { + RUN_IN_LOCALE_SECTION (c_locale, + c_result = wcstol (c_str, &c_endptr, c_base)); + } + else + c_result = wcstol (c_str, &c_endptr, c_base); + + if (c_endptr == c_str) + { + result = SCM_BOOL_F; + char_count = scm_from_int (0); + } + else + { + result = scm_from_long (c_result); + char_count = scm_from_ptrdiff_t (c_endptr - c_str); + } + } + scm_dynwind_end (); + } - if (c_endptr == c_str) - result = SCM_BOOL_F; - else - result = scm_from_long (c_result); + scm_remember_upto_here_2 (str, locale); - return scm_values_2 (result, scm_from_long (c_endptr - c_str)); + return scm_values_2 (result, char_count); } #undef FUNC_NAME @@ -1424,36 +1566,89 @@ SCM_DEFINE (scm_locale_string_to_inexact, "locale-string->inexact", #define FUNC_NAME s_scm_locale_string_to_inexact { SCM result; - double c_result; - const char *c_str; - char *c_endptr; + SCM char_count; scm_t_locale c_locale; SCM_VALIDATE_STRING (1, str); - c_str = scm_i_string_chars (str); SCM_VALIDATE_OPTIONAL_LOCALE_COPY (2, locale, c_locale); - if (c_locale != NULL) + if (scm_i_is_narrow_string (str)) { + double c_result; + char *c_str; + int c_str_malloc_p; + char *c_endptr; + + scm_dynwind_begin (0); + { + SCM_NARROW_STRING_TO_C (str, c_str, c_str_malloc_p); + if (c_str_malloc_p) + scm_dynwind_free (c_str); + + if (c_locale != NULL) + { #if defined USE_GNU_LOCALE_API && defined HAVE_STRTOD_L - c_result = strtod_l (c_str, &c_endptr, c_locale); + c_result = strtod_l (c_str, &c_endptr, c_locale); #else - RUN_IN_LOCALE_SECTION (c_locale, - c_result = strtod (c_str, &c_endptr)); + RUN_IN_LOCALE_SECTION (c_locale, + c_result = strtod (c_str, &c_endptr)); #endif + } + else + c_result = strtod (c_str, &c_endptr); + + if (c_endptr == c_str) + { + result = SCM_BOOL_F; + char_count = scm_from_int (0); + } + else + { + result = scm_from_double (c_result); + char_count = scm_from_ptrdiff_t (c_endptr - c_str); + } + } + scm_dynwind_end (); } else - c_result = strtod (c_str, &c_endptr); + { + double c_result; + wchar_t *c_str; + int c_str_malloc_p; + wchar_t *c_endptr; - scm_remember_upto_here (str); + scm_dynwind_begin (0); + { + SCM_WIDE_STRING_TO_C (str, c_str, c_str_malloc_p); + if (c_str_malloc_p) + scm_dynwind_free (c_str); + + if (c_locale != NULL) + { + RUN_IN_LOCALE_SECTION (c_locale, + c_result = wcstod (c_str, &c_endptr)); + } + else + c_result = wcstod (c_str, &c_endptr); + + if (c_endptr == c_str) + { + result = SCM_BOOL_F; + char_count = scm_from_int (0); + } + else + { + result = scm_from_double (c_result); + char_count = scm_from_ptrdiff_t (c_endptr - c_str); + } + } + scm_dynwind_end (); + } - if (c_endptr == c_str) - result = SCM_BOOL_F; - else - result = scm_from_double (c_result); + scm_remember_upto_here_2 (str, locale); - return scm_values_2 (result, scm_from_long (c_endptr - c_str)); + return scm_values_2 (result, char_count); } #undef FUNC_NAME diff --git a/test-suite/tests/i18n.test b/test-suite/tests/i18n.test index 83b53d0..ec295c1 100644 --- a/test-suite/tests/i18n.test +++ b/test-suite/tests/i18n.test @@ -412,19 +412,70 @@ (with-test-prefix "number parsing" (pass-if "locale-string->integer" - (call-with-values (lambda () (locale-string->integer "123")) - (lambda (result char-count) - (and (equal? result 123) - (equal? char-count 3))))) + (and (call-with-values + (lambda () (locale-string->integer "123")) + (lambda (result char-count) + (and (equal? result 123) + (equal? char-count 3)))) + (call-with-values + (lambda () (locale-string->integer (substring "12" 0 1) + 10 + (make-locale LC_ALL "C"))) + (lambda (result char-count) + (and (equal? result 1) + (equal? char-count 1)))) + (call-with-values + (lambda () (locale-string->integer (substring "1\u0100" 0 1) + 10 + (make-locale LC_ALL "C"))) + (lambda (result char-count) + (and (equal? result 1) + (equal? char-count 1)))))) + + (pass-if "locale-string->integer (American English)" + (under-american-english-locale-or-unresolved + (lambda () + (call-with-values + (lambda () (locale-string->integer (substring "\u20021" 0 2) + 10 + %american-english-locale)) + (lambda (result char-count) + (and (equal? result 1) + (equal? char-count 2))))))) (pass-if "locale-string->inexact" - (call-with-values - (lambda () - (locale-string->inexact "123.456" - (make-locale (list LC_NUMERIC) "C"))) - (lambda (result char-count) - (and (equal? result 123.456) - (equal? char-count 7))))) + (and (call-with-values + (lambda () + (locale-string->inexact "123.456" + (make-locale (list LC_NUMERIC) "C"))) + (lambda (result char-count) + (and (equal? result 123.456) + (equal? char-count 7)))) + (call-with-values + (lambda () + (locale-string->inexact (substring "0.5625" 0 3) + (make-locale LC_ALL "C"))) + (lambda (result char-count) + (and (equal? result 0.5) + (equal? char-count 3)))) + (call-with-values + (lambda () + (locale-string->inexact (substring "1.25\u0100" 0 4) + (make-locale LC_ALL "C"))) + (lambda (result char-count) + (and (equal? result 1.25) + (equal? char-count 4)))))) + + (pass-if "locale-string->inexact (American English)" + (under-american-english-locale-or-unresolved + (lambda () + (call-with-values + (lambda () + (locale-string->inexact (substring "\u20021.25" 0 5) + %american-english-locale)) + (lambda (result char-count) + (and (equal? result 1.25) + (equal? char-count 5))))))) (pass-if "locale-string->inexact (French)" (under-french-locale-or-unresolved -- 2.35.1