From 639d9a53c883ec310783ba8ed9b43a485c0e5b61 Mon Sep 17 00:00:00 2001 From: Jonas Hahnfeld Date: Tue, 24 Oct 2023 23:47:41 +0200 Subject: [PATCH 5/5] Store hashes as uintptr_t As for scm_t_inum, Guile expects that hashes have the same size as pointers to get compatible bytecode (with respect to interned symbols) on different platforms. This assumption breaks on 64-bit Windows where longs are still 32 bit. Instead use uintptr_t as the datatype. Based on changes by Jan Nieuwenhuizen, Mike Gran, and Andy Wingo. * libguile/hash.c: * libguile/hash.h: * libguile/strings.c: * libguile/strings.h: * libguile/symbols.c: * libguile/symbols.h: Use uintptr_t to store hashes. --- libguile/hash.c | 68 +++++++++++++++++++++++----------------------- libguile/hash.h | 22 +++++++-------- libguile/strings.c | 2 +- libguile/strings.h | 2 +- libguile/symbols.c | 24 ++++++++-------- libguile/symbols.h | 6 ++-- 6 files changed, 62 insertions(+), 62 deletions(-) diff --git a/libguile/hash.c b/libguile/hash.c index 5abdfe397..cfe14bf1d 100644 --- a/libguile/hash.c +++ b/libguile/hash.c @@ -112,31 +112,31 @@ extern double floor(); the hash on a 64-bit system are equal to the hash on a 32-bit \ system. The low 32 bits just add more entropy. */ \ if (sizeof (ret) == 8) \ - ret = (((unsigned long) c) << 32) | b; \ + ret = (((uintptr_t) c) << 32) | b; \ else \ ret = c; \ } while (0) -static unsigned long +static uintptr_t narrow_string_hash (const uint8_t *str, size_t len) { - unsigned long ret; + uintptr_t ret; JENKINS_LOOKUP3_HASHWORD2 (str, len, ret); ret >>= 2; /* Ensure that it fits in a fixnum. */ return ret; } -static unsigned long +static uintptr_t wide_string_hash (const scm_t_wchar *str, size_t len) { - unsigned long ret; + uintptr_t ret; JENKINS_LOOKUP3_HASHWORD2 (str, len, ret); ret >>= 2; /* Ensure that it fits in a fixnum. */ return ret; } -unsigned long +uintptr_t scm_i_string_hash (SCM str) { size_t len = scm_i_string_length (str); @@ -148,13 +148,13 @@ scm_i_string_hash (SCM str) return wide_string_hash (scm_i_string_wide_chars (str), len); } -unsigned long +uintptr_t scm_i_locale_string_hash (const char *str, size_t len) { return scm_i_string_hash (scm_from_locale_stringn (str, len)); } -unsigned long +uintptr_t scm_i_latin1_string_hash (const char *str, size_t len) { if (len == (size_t) -1) @@ -164,11 +164,11 @@ scm_i_latin1_string_hash (const char *str, size_t len) } /* A tricky optimization, but probably worth it. */ -unsigned long +uintptr_t scm_i_utf8_string_hash (const char *str, size_t len) { const uint8_t *end, *ustr = (const uint8_t *) str; - unsigned long ret; + uintptr_t ret; /* The length of the string in characters. This name corresponds to Jenkins' original name. */ @@ -219,8 +219,8 @@ scm_i_utf8_string_hash (const char *str, size_t len) final (a, b, c); - if (sizeof (unsigned long) == 8) - ret = (((unsigned long) c) << 32) | b; + if (sizeof (uintptr_t) == 8) + ret = (((uintptr_t) c) << 32) | b; else ret = c; @@ -228,16 +228,16 @@ scm_i_utf8_string_hash (const char *str, size_t len) return ret; } -static unsigned long scm_raw_ihashq (scm_t_bits key); -static unsigned long scm_raw_ihash (SCM obj, size_t depth); +static uintptr_t scm_raw_ihashq (scm_t_bits key); +static uintptr_t scm_raw_ihash (SCM obj, size_t depth); /* Return the hash of struct OBJ. Traverse OBJ's fields to compute the result, unless DEPTH is zero. Assumes that OBJ is a struct. */ -static unsigned long +static uintptr_t scm_i_struct_hash (SCM obj, size_t depth) { size_t struct_size, field_num; - unsigned long hash; + uintptr_t hash; struct_size = SCM_STRUCT_SIZE (obj); @@ -257,7 +257,7 @@ scm_i_struct_hash (SCM obj, size_t depth) /* Thomas Wang's integer hasher, from http://www.cris.com/~Ttwang/tech/inthash.htm. */ -static unsigned long +static uintptr_t scm_raw_ihashq (scm_t_bits key) { if (sizeof (key) < 8) @@ -283,7 +283,7 @@ scm_raw_ihashq (scm_t_bits key) } /* `depth' is used to limit recursion. */ -static unsigned long +static uintptr_t scm_raw_ihash (SCM obj, size_t depth) { if (SCM_IMP (obj)) @@ -301,7 +301,7 @@ scm_raw_ihash (SCM obj, size_t depth) SCM n = SCM_I_MAKINUM (SCM_MOST_POSITIVE_FIXNUM); if (scm_is_inexact (obj)) obj = scm_inexact_to_exact (obj); - return scm_raw_ihashq (scm_to_ulong (scm_modulo (obj, n))); + return scm_raw_ihashq (scm_to_uintptr_t (scm_modulo (obj, n))); } else return scm_i_string_hash (scm_number_to_string (obj, scm_from_int (10))); @@ -318,7 +318,7 @@ scm_raw_ihash (SCM obj, size_t depth) { size_t len = SCM_SIMPLE_VECTOR_LENGTH (obj); size_t i = depth / 2; - unsigned long h = scm_raw_ihashq (SCM_CELL_WORD_0 (obj)); + uintptr_t h = scm_raw_ihashq (SCM_CELL_WORD_0 (obj)); if (len) while (i--) h ^= scm_raw_ihash (scm_c_vector_ref (obj, h % len), i); @@ -326,7 +326,7 @@ scm_raw_ihash (SCM obj, size_t depth) } case scm_tc7_syntax: { - unsigned long h; + uintptr_t h; h = scm_raw_ihash (scm_syntax_expression (obj), depth); h ^= scm_raw_ihash (scm_syntax_wrap (obj), depth); h ^= scm_raw_ihash (scm_syntax_module (obj), depth); @@ -365,8 +365,8 @@ scm_raw_ihash (SCM obj, size_t depth) -unsigned long -scm_ihashq (SCM obj, unsigned long n) +uintptr_t +scm_ihashq (SCM obj, uintptr_t n) { return scm_raw_ihashq (SCM_UNPACK (obj)) % n; } @@ -386,8 +386,8 @@ SCM_DEFINE (scm_hashq, "hashq", 2, 0, 0, "different values, since @code{foo} will be garbage collected.") #define FUNC_NAME s_scm_hashq { - unsigned long sz = scm_to_unsigned_integer (size, 1, ULONG_MAX); - return scm_from_ulong (scm_ihashq (key, sz)); + uintptr_t sz = scm_to_unsigned_integer (size, 1, UINTPTR_MAX); + return scm_from_unsigned_integer (scm_ihashq (key, sz)); } #undef FUNC_NAME @@ -395,8 +395,8 @@ SCM_DEFINE (scm_hashq, "hashq", 2, 0, 0, -unsigned long -scm_ihashv (SCM obj, unsigned long n) +uintptr_t +scm_ihashv (SCM obj, uintptr_t n) { if (SCM_NUMP(obj)) return scm_raw_ihash (obj, 10) % n; @@ -419,8 +419,8 @@ SCM_DEFINE (scm_hashv, "hashv", 2, 0, 0, "different values, since @code{foo} will be garbage collected.") #define FUNC_NAME s_scm_hashv { - unsigned long sz = scm_to_unsigned_integer (size, 1, ULONG_MAX); - return scm_from_ulong (scm_ihashv (key, sz)); + uintptr_t sz = scm_to_unsigned_integer (size, 1, UINTPTR_MAX); + return scm_from_unsigned_integer (scm_ihashv (key, sz)); } #undef FUNC_NAME @@ -428,10 +428,10 @@ SCM_DEFINE (scm_hashv, "hashv", 2, 0, 0, -unsigned long -scm_ihash (SCM obj, unsigned long n) +uintptr_t +scm_ihash (SCM obj, uintptr_t n) { - return (unsigned long) scm_raw_ihash (obj, 10) % n; + return scm_raw_ihash (obj, 10) % n; } SCM_DEFINE (scm_hash, "hash", 2, 0, 0, @@ -442,8 +442,8 @@ SCM_DEFINE (scm_hash, "hash", 2, 0, 0, "integer in the range 0 to @var{size} - 1.") #define FUNC_NAME s_scm_hash { - unsigned long sz = scm_to_unsigned_integer (size, 1, ULONG_MAX); - return scm_from_ulong (scm_ihash (key, sz)); + uintptr_t sz = scm_to_unsigned_integer (size, 1, UINTPTR_MAX); + return scm_from_unsigned_integer (scm_ihash (key, sz)); } #undef FUNC_NAME diff --git a/libguile/hash.h b/libguile/hash.h index 0e82b4afc..580d2ce93 100644 --- a/libguile/hash.h +++ b/libguile/hash.h @@ -26,19 +26,19 @@ -SCM_INTERNAL unsigned long scm_i_locale_string_hash (const char *str, - size_t len); -SCM_INTERNAL unsigned long scm_i_latin1_string_hash (const char *str, - size_t len); -SCM_INTERNAL unsigned long scm_i_utf8_string_hash (const char *str, - size_t len); - -SCM_INTERNAL unsigned long scm_i_string_hash (SCM str); -SCM_API unsigned long scm_ihashq (SCM obj, unsigned long n); +SCM_INTERNAL uintptr_t scm_i_locale_string_hash (const char *str, + size_t len); +SCM_INTERNAL uintptr_t scm_i_latin1_string_hash (const char *str, + size_t len); +SCM_INTERNAL uintptr_t scm_i_utf8_string_hash (const char *str, + size_t len); + +SCM_INTERNAL uintptr_t scm_i_string_hash (SCM str); +SCM_API uintptr_t scm_ihashq (SCM obj, uintptr_t n); SCM_API SCM scm_hashq (SCM obj, SCM n); -SCM_API unsigned long scm_ihashv (SCM obj, unsigned long n); +SCM_API uintptr_t scm_ihashv (SCM obj, uintptr_t n); SCM_API SCM scm_hashv (SCM obj, SCM n); -SCM_API unsigned long scm_ihash (SCM obj, unsigned long n); +SCM_API uintptr_t scm_ihash (SCM obj, uintptr_t n); SCM_API SCM scm_hash (SCM obj, SCM n); SCM_INTERNAL void scm_init_hash (void); diff --git a/libguile/strings.c b/libguile/strings.c index 5eebb3300..572c554c3 100644 --- a/libguile/strings.c +++ b/libguile/strings.c @@ -760,7 +760,7 @@ scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr) #define SYMBOL_STRINGBUF SCM_CELL_OBJECT_1 SCM -scm_i_make_symbol (SCM name, scm_t_bits flags, unsigned long hash) +scm_i_make_symbol (SCM name, scm_t_bits flags, uintptr_t hash) { SCM buf, symbol; size_t start, length = STRING_LENGTH (name); diff --git a/libguile/strings.h b/libguile/strings.h index f28ef3246..aa6a601be 100644 --- a/libguile/strings.h +++ b/libguile/strings.h @@ -250,7 +250,7 @@ SCM_INTERNAL void scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr); /* internal functions related to symbols. */ SCM_INTERNAL SCM scm_i_make_symbol (SCM name, scm_t_bits flags, - unsigned long hash); + uintptr_t hash); SCM_INTERNAL const char *scm_i_symbol_chars (SCM sym); SCM_INTERNAL const scm_t_wchar *scm_i_symbol_wide_chars (SCM sym); SCM_INTERNAL size_t scm_i_symbol_length (SCM sym); diff --git a/libguile/symbols.c b/libguile/symbols.c index 292941e9d..b3ddab67d 100644 --- a/libguile/symbols.c +++ b/libguile/symbols.c @@ -71,8 +71,8 @@ SCM_DEFINE (scm_sys_symbols, "%symbols", 0, 0, 0, /* {Symbols} */ -unsigned long -scm_i_hash_symbol (SCM obj, unsigned long n, void *closure) +uintptr_t +scm_i_hash_symbol (SCM obj, uintptr_t n, void *closure) { return scm_i_symbol_hash (obj) % n; } @@ -80,7 +80,7 @@ scm_i_hash_symbol (SCM obj, unsigned long n, void *closure) struct string_lookup_data { SCM string; - unsigned long string_hash; + uintptr_t string_hash; }; static int @@ -102,7 +102,7 @@ string_lookup_predicate_fn (SCM sym, void *closure) } static SCM -lookup_interned_symbol (SCM name, unsigned long raw_hash) +lookup_interned_symbol (SCM name, uintptr_t raw_hash) { struct string_lookup_data data; @@ -118,7 +118,7 @@ struct latin1_lookup_data { const char *str; size_t len; - unsigned long string_hash; + uintptr_t string_hash; }; static int @@ -134,7 +134,7 @@ latin1_lookup_predicate_fn (SCM sym, void *closure) static SCM lookup_interned_latin1_symbol (const char *str, size_t len, - unsigned long raw_hash) + uintptr_t raw_hash) { struct latin1_lookup_data data; @@ -151,7 +151,7 @@ struct utf8_lookup_data { const char *str; size_t len; - unsigned long string_hash; + uintptr_t string_hash; }; static int @@ -201,7 +201,7 @@ utf8_lookup_predicate_fn (SCM sym, void *closure) static SCM lookup_interned_utf8_symbol (const char *str, size_t len, - unsigned long raw_hash) + uintptr_t raw_hash) { struct utf8_lookup_data data; @@ -239,7 +239,7 @@ static SCM scm_i_str2symbol (SCM str) { SCM symbol; - unsigned long raw_hash = scm_i_string_hash (str); + uintptr_t raw_hash = scm_i_string_hash (str); symbol = lookup_interned_symbol (str, raw_hash); if (scm_is_true (symbol)) @@ -261,7 +261,7 @@ scm_i_str2symbol (SCM str) static SCM scm_i_str2uninterned_symbol (SCM str) { - unsigned long raw_hash = scm_i_string_hash (str); + uintptr_t raw_hash = scm_i_string_hash (str); return scm_i_make_symbol (str, SCM_I_F_SYMBOL_UNINTERNED, raw_hash); } @@ -416,7 +416,7 @@ scm_from_latin1_symbol (const char *sym) SCM scm_from_latin1_symboln (const char *sym, size_t len) { - unsigned long hash; + uintptr_t hash; SCM ret; if (len == (size_t) -1) @@ -442,7 +442,7 @@ scm_from_utf8_symbol (const char *sym) SCM scm_from_utf8_symboln (const char *sym, size_t len) { - unsigned long hash; + uintptr_t hash; SCM ret; if (len == (size_t) -1) diff --git a/libguile/symbols.h b/libguile/symbols.h index e8bc3346f..f541f5126 100644 --- a/libguile/symbols.h +++ b/libguile/symbols.h @@ -31,7 +31,7 @@ #define scm_is_symbol(x) (SCM_HAS_TYP7 (x, scm_tc7_symbol)) -#define scm_i_symbol_hash(x) ((unsigned long) SCM_CELL_WORD_2 (x)) +#define scm_i_symbol_hash(x) ((uintptr_t) SCM_CELL_WORD_2 (x)) #define scm_i_symbol_is_interned(x) \ (!(SCM_CELL_WORD_0 (x) & SCM_I_F_SYMBOL_UNINTERNED)) @@ -122,8 +122,8 @@ SCM_API SCM scm_take_utf8_symboln (char *sym, size_t len); /* internal functions. */ -SCM_INTERNAL unsigned long scm_i_hash_symbol (SCM obj, unsigned long n, - void *closure); +SCM_INTERNAL uintptr_t scm_i_hash_symbol (SCM obj, uintptr_t n, + void *closure); SCM_INTERNAL void scm_symbols_prehistory (void); SCM_INTERNAL void scm_init_symbols (void); -- 2.42.0