From 695c2a6189458a292819df8fba659ea488dc0b4e Mon Sep 17 00:00:00 2001 From: Vijay Marupudi Date: Thu, 20 Jan 2022 22:19:25 -0500 Subject: [PATCH] Enable utf8->string to take a range Additionally, adds a scm_utf8_to_string_range function for access from C. --- doc/ref/api-data.texi | 3 ++- libguile/bytevectors.c | 48 +++++++++++++++++++++++++++++++++++------- libguile/bytevectors.h | 1 + 3 files changed, 43 insertions(+), 9 deletions(-) diff --git a/doc/ref/api-data.texi b/doc/ref/api-data.texi index b6c2c4d61..1bdd1f7ed 100644 --- a/doc/ref/api-data.texi +++ b/doc/ref/api-data.texi @@ -7139,10 +7139,11 @@ UTF-32 (aka. UCS-4) encoding of @var{str}. For UTF-16 and UTF-32, it defaults to big endian. @end deffn -@deffn {Scheme Procedure} utf8->string utf +@deffn {Scheme Procedure} utf8->string utf [start [end]] @deffnx {Scheme Procedure} utf16->string utf [endianness] @deffnx {Scheme Procedure} utf32->string utf [endianness] @deffnx {C Function} scm_utf8_to_string (utf) +@deffnx {C Function} scm_utf8_to_string_range (utf, start, end) @deffnx {C Function} scm_utf16_to_string (utf, endianness) @deffnx {C Function} scm_utf32_to_string (utf, endianness) Return a newly allocated string that contains from the UTF-8-, UTF-16-, diff --git a/libguile/bytevectors.c b/libguile/bytevectors.c index f42fbb427..44a062257 100644 --- a/libguile/bytevectors.c +++ b/libguile/bytevectors.c @@ -2094,27 +2094,59 @@ SCM_DEFINE (scm_string_to_utf32, "string->utf32", return (str); -SCM_DEFINE (scm_utf8_to_string, "utf8->string", - 1, 0, 0, - (SCM utf), +SCM_DEFINE (scm_utf8_to_string_range, "utf8->string", + 1, 2, 0, + (SCM utf, SCM start, SCM end), "Return a newly allocate string that contains from the UTF-8-" "encoded contents of bytevector @var{utf}.") -#define FUNC_NAME s_scm_utf8_to_string +#define FUNC_NAME s_scm_utf8_to_string_range { SCM str; const char *c_utf; - size_t c_utf_len = 0; + size_t c_start = 0; + size_t c_end; + size_t c_len; SCM_VALIDATE_BYTEVECTOR (1, utf); - - c_utf_len = SCM_BYTEVECTOR_LENGTH (utf); c_utf = (char *) SCM_BYTEVECTOR_CONTENTS (utf); - str = scm_from_utf8_stringn (c_utf, c_utf_len); + c_len = SCM_BYTEVECTOR_LENGTH(utf); + c_end = c_len; + + if (!scm_is_eq (start, SCM_UNDEFINED)) + { + c_start = scm_to_size_t (start); + if (SCM_UNLIKELY (c_start >= c_len)) + { + scm_out_of_range (FUNC_NAME, start); + } + + if (!scm_is_eq (end, SCM_UNDEFINED)) + { + c_end = scm_to_size_t (end); + if (SCM_UNLIKELY (c_end > c_len)) + scm_out_of_range (FUNC_NAME, end); + } + } + + if (SCM_UNLIKELY(c_end < c_start)) { + scm_out_of_range (FUNC_NAME, end); + } + + str = scm_from_utf8_stringn (c_utf + c_start, c_end - c_start); return (str); } #undef FUNC_NAME +SCM +scm_utf8_to_string(SCM utf) +#define FUNC_NAME s_scm_utf8_to_string +{ + return scm_utf8_to_string_range(utf, SCM_UNDEFINED, SCM_UNDEFINED); +} +#undef FUNC_NAME + + SCM_DEFINE (scm_utf16_to_string, "utf16->string", 1, 1, 0, (SCM utf, SCM endianness), diff --git a/libguile/bytevectors.h b/libguile/bytevectors.h index 980d6e267..82a66ee5e 100644 --- a/libguile/bytevectors.h +++ b/libguile/bytevectors.h @@ -113,6 +113,7 @@ SCM_API SCM scm_string_to_utf8 (SCM); SCM_API SCM scm_string_to_utf16 (SCM, SCM); SCM_API SCM scm_string_to_utf32 (SCM, SCM); SCM_API SCM scm_utf8_to_string (SCM); +SCM_API SCM scm_utf8_to_string_range (SCM, SCM, SCM); SCM_API SCM scm_utf16_to_string (SCM, SCM); SCM_API SCM scm_utf32_to_string (SCM, SCM); -- 2.34.1