From: Rob Browning <rlb@defaultvalue.org>
To: 56413@debbugs.gnu.org
Cc: "Ludovic Courtès" <ludo@gnu.org>
Subject: bug#56413: [PATCH v2 1/1] scm_i_utf8_string_hash: compute u8 chars not bytes
Date: Sun, 5 Mar 2023 16:21:00 -0600 [thread overview]
Message-ID: <20230305222100.1062507-1-rlb@defaultvalue.org> (raw)
In-Reply-To: <87tu39rc4i.fsf@gnu.org>
Noticed while investigating a migration to utf-8 strings. After making
changes that routed non-ascii symbol hashing through this function,
encoding-iso88597.test began intermittently failing because it would
traverse trailing garbage when u8_strnlen reported 8 chars instead of 4.
Change the scm_i_str2symbol and scm_i_str2uninterned_symbol internal
hash type to unsigned long to explicitly match the scm_i_string_hash
result type.
* libguile/hash.c (scm_i_utf8_string_hash): Call u8_mbsnlen not u8_strnlen.
* libguile/symbols.c (scm_i_str2symbol, scm_i_str2uninterned_symbol):
Use unsigned long for scm_i_string_hash result.
* test-suite/standalone/.gitignore: Add test-hashing.
* test-suite/standalone/Makefile.am: Add test-hashing.
* test-suite/standalone/test-hashing.c: Add.
---
Fixed up a few things: the test, gitignore, and added the changelog
entries to the message.
libguile/hash.c | 2 +-
libguile/symbols.c | 4 +-
test-suite/standalone/.gitignore | 1 +
test-suite/standalone/Makefile.am | 7 ++++
test-suite/standalone/test-hashing.c | 63 ++++++++++++++++++++++++++++
5 files changed, 74 insertions(+), 3 deletions(-)
create mode 100644 test-suite/standalone/test-hashing.c
diff --git a/libguile/hash.c b/libguile/hash.c
index c192ac2e5..5abdfe397 100644
--- a/libguile/hash.c
+++ b/libguile/hash.c
@@ -185,7 +185,7 @@ scm_i_utf8_string_hash (const char *str, size_t len)
/* Invalid UTF-8; punt. */
return scm_i_string_hash (scm_from_utf8_stringn (str, len));
- length = u8_strnlen (ustr, len);
+ length = u8_mbsnlen (ustr, len);
/* Set up the internal state. */
a = b = c = 0xdeadbeef + ((uint32_t)(length<<2)) + 47;
diff --git a/libguile/symbols.c b/libguile/symbols.c
index 02be7c1c4..086abf585 100644
--- a/libguile/symbols.c
+++ b/libguile/symbols.c
@@ -239,7 +239,7 @@ static SCM
scm_i_str2symbol (SCM str)
{
SCM symbol;
- size_t raw_hash = scm_i_string_hash (str);
+ unsigned long raw_hash = scm_i_string_hash (str);
symbol = lookup_interned_symbol (str, raw_hash);
if (scm_is_true (symbol))
@@ -261,7 +261,7 @@ scm_i_str2symbol (SCM str)
static SCM
scm_i_str2uninterned_symbol (SCM str)
{
- size_t raw_hash = scm_i_string_hash (str);
+ unsigned long raw_hash = scm_i_string_hash (str);
return scm_i_make_symbol (str, SCM_I_F_SYMBOL_UNINTERNED, raw_hash);
}
diff --git a/test-suite/standalone/.gitignore b/test-suite/standalone/.gitignore
index 794146e60..f38f7fbe2 100644
--- a/test-suite/standalone/.gitignore
+++ b/test-suite/standalone/.gitignore
@@ -1,5 +1,6 @@
/test-conversion
/test-gh
+/test-hashing
/test-list
/test-num2integral
/test-round
diff --git a/test-suite/standalone/Makefile.am b/test-suite/standalone/Makefile.am
index 547241afa..17bb59a18 100644
--- a/test-suite/standalone/Makefile.am
+++ b/test-suite/standalone/Makefile.am
@@ -167,6 +167,13 @@ test_conversion_LDADD = $(LIBGUILE_LDADD) $(top_builddir)/lib/libgnu.la
check_PROGRAMS += test-conversion
TESTS += test-conversion
+# test-hashing
+test_hashing_SOURCES = test-hashing.c
+test_hashing_CFLAGS = ${test_cflags}
+test_hashing_LDADD = $(LIBGUILE_LDADD) $(top_builddir)/lib/libgnu.la
+check_PROGRAMS += test-hashing
+TESTS += test-hashing
+
# test-loose-ends
test_loose_ends_SOURCES = test-loose-ends.c
test_loose_ends_CFLAGS = ${test_cflags}
diff --git a/test-suite/standalone/test-hashing.c b/test-suite/standalone/test-hashing.c
new file mode 100644
index 000000000..5982a0fdb
--- /dev/null
+++ b/test-suite/standalone/test-hashing.c
@@ -0,0 +1,63 @@
+/* Copyright 2022-2023
+ Free Software Foundation, Inc.
+
+ This file is part of Guile.
+
+ Guile is free software: you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ Guile is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with Guile. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#if HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <libguile.h>
+
+#include <stdio.h>
+
+static void
+test_hashing ()
+{
+ // Make sure a utf-8 symbol has the expected hash. In addition to
+ // catching algorithmic regressions, this would have caught a
+ // long-standing buffer overflow.
+
+ // Περί
+ char about_u8[] = {0xce, 0xa0, 0xce, 0xb5, 0xcf, 0x81, 0xce, 0xaf, 0};
+ SCM sym = scm_from_utf8_symbol (about_u8);
+
+ // Value determined by calling wide_string_hash on {0x3A0, 0x3B5,
+ // 0x3C1, 0x3AF} via a temporary test program.
+ const unsigned long expect = 4029223418961680680;
+ const unsigned long actual = scm_to_ulong (scm_symbol_hash (sym));
+
+ if (actual != expect)
+ {
+ fprintf (stderr, "fail: unexpected utf-8 symbol hash (%lu != %lu)\n",
+ actual, expect);
+ exit (EXIT_FAILURE);
+ }
+}
+
+static void
+tests (void *data, int argc, char **argv)
+{
+ test_hashing ();
+}
+
+int
+main (int argc, char *argv[])
+{
+ scm_boot_guile (argc, argv, tests, NULL);
+ return 0;
+}
--
2.39.2
next prev parent reply other threads:[~2023-03-05 22:21 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-07-06 1:23 bug#56413: [PATCH 1/1] scm_i_utf8_string_hash: compute u8 chars not bytes Rob Browning
2022-07-06 3:04 ` Rob Browning
2022-11-05 22:18 ` Ludovic Courtès
2022-11-06 16:44 ` Rob Browning
2022-11-06 17:45 ` Rob Browning
2022-11-07 13:06 ` Ludovic Courtès
2022-11-06 19:46 ` Rob Browning
2022-11-07 13:07 ` Ludovic Courtès
2022-11-08 5:05 ` Rob Browning
2022-11-08 10:09 ` Ludovic Courtès
2023-03-05 22:21 ` Rob Browning [this message]
2023-03-06 16:39 ` bug#56413: [PATCH v2 " Ludovic Courtès
2023-03-12 19:30 ` bug#56413: [PATCH v3 " Rob Browning
2023-03-13 11:29 ` Ludovic Courtès
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://www.gnu.org/software/guile/
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230305222100.1062507-1-rlb@defaultvalue.org \
--to=rlb@defaultvalue.org \
--cc=56413@debbugs.gnu.org \
--cc=ludo@gnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).