From 619e3d3afec2c116007d9cb2ad32a500fb32a7dd Mon Sep 17 00:00:00 2001 From: Rob Browning Date: Sun, 30 Jun 2024 22:41:40 -0500 Subject: [PATCH 1/2] scm_i_utf8_string_hash: don't overrun when len is zero When the length is zero, the previous code would include the byte after the end of the string in the hash. Fix that (the wide and narrow hashers also guard against it via "case 0"), and while we're there, switch to u8_mbtouc since the unsafe variant is now the same (see the info pages), and don't bother mutating length for the trailing bytes, since we don't need to. libguile/hash.c (scm_i_utf8_string_hash): switch to u8_mbtouc, and avoid overrun when len == 0. --- libguile/hash.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/libguile/hash.c b/libguile/hash.c index a038a11bf..d92f60df8 100644 --- a/libguile/hash.c +++ b/libguile/hash.c @@ -195,32 +195,34 @@ scm_i_utf8_string_hash (const char *str, size_t len) /* Handle most of the key. */ while (length > 3) { - ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr); + ustr += u8_mbtouc (&u32, ustr, end - ustr); a += u32; - ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr); + ustr += u8_mbtouc (&u32, ustr, end - ustr); b += u32; - ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr); + ustr += u8_mbtouc (&u32, ustr, end - ustr); c += u32; mix (a, b, c); length -= 3; } /* Handle the last 3 elements's. */ - ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr); - a += u32; - if (--length) + if (length) { - ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr); - b += u32; - if (--length) + ustr += u8_mbtouc (&u32, ustr, end - ustr); + a += u32; + if (length > 1) { - ustr += u8_mbtouc_unsafe (&u32, ustr, end - ustr); - c += u32; + ustr += u8_mbtouc (&u32, ustr, end - ustr); + b += u32; + if (length > 2) + { + ustr += u8_mbtouc (&u32, ustr, end - ustr); + c += u32; + } } + final (a, b, c); } - final (a, b, c); - if (sizeof (unsigned long) == 8) ret = (((unsigned long) c) << 32) | b; else -- 2.43.0