From mboxrd@z Thu Jan 1 00:00:00 1970 Path: news.gmane.org!not-for-mail From: Mark H Weaver Newsgroups: gmane.lisp.guile.devel Subject: Re: [PATCH] Efficient Gensym Hack (v2) Date: Tue, 06 Mar 2012 04:55:40 -0500 Message-ID: <87pqcqvysj.fsf@netris.org> References: <87mx7vx8zg.fsf@netris.org> NNTP-Posting-Host: plane.gmane.org Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="=-=-=" X-Trace: dough.gmane.org 1331027924 10966 80.91.229.3 (6 Mar 2012 09:58:44 GMT) X-Complaints-To: usenet@dough.gmane.org NNTP-Posting-Date: Tue, 6 Mar 2012 09:58:44 +0000 (UTC) To: guile-devel@gnu.org Original-X-From: guile-devel-bounces+guile-devel=m.gmane.org@gnu.org Tue Mar 06 10:58:40 2012 Return-path: Envelope-to: guile-devel@m.gmane.org Original-Received: from lists.gnu.org ([208.118.235.17]) by plane.gmane.org with esmtp (Exim 4.69) (envelope-from ) id 1S4rAD-0007Ii-P4 for guile-devel@m.gmane.org; Tue, 06 Mar 2012 10:58:38 +0100 Original-Received: from localhost ([::1]:55281 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1S4rAD-0005pj-2H for guile-devel@m.gmane.org; Tue, 06 Mar 2012 04:58:37 -0500 Original-Received: from eggs.gnu.org ([208.118.235.92]:41971) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1S4rA4-0005pL-1V for guile-devel@gnu.org; Tue, 06 Mar 2012 04:58:35 -0500 Original-Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1S4r9i-0000JE-Gw for guile-devel@gnu.org; Tue, 06 Mar 2012 04:58:27 -0500 Original-Received: from world.peace.net ([96.39.62.75]:47220) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1S4r9h-0000Ip-EF for guile-devel@gnu.org; Tue, 06 Mar 2012 04:58:06 -0500 Original-Received: from 209-6-91-212.c3-0.smr-ubr1.sbo-smr.ma.cable.rcn.com ([209.6.91.212] helo=yeeloong) by world.peace.net with esmtpsa (TLS1.0:DHE_RSA_AES_128_CBC_SHA1:16) (Exim 4.72) (envelope-from ) id 1S4r9L-0002xF-OM; Tue, 06 Mar 2012 04:57:46 -0500 In-Reply-To: <87mx7vx8zg.fsf@netris.org> (Mark H. Weaver's message of "Mon, 05 Mar 2012 12:17:55 -0500") User-Agent: Gnus/5.13 (Gnus v5.13) Emacs/24.0.92 (gnu/linux) X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.6 (newer, 3) X-Received-From: 96.39.62.75 X-BeenThere: guile-devel@gnu.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: "Developers list for Guile, the GNU extensibility library" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: guile-devel-bounces+guile-devel=m.gmane.org@gnu.org Original-Sender: guile-devel-bounces+guile-devel=m.gmane.org@gnu.org Xref: news.gmane.org gmane.lisp.guile.devel:14024 Archived-At: --=-=-= Content-Type: text/plain Hello all, Here's an improved version of the Efficient Gensym Hack (v2). Mark --=-=-= Content-Type: text/x-patch Content-Disposition: inline; filename=0001-Don-t-lock-mutex-to-set-shared-flag-on-stringbuf-if-.patch Content-Description: [PATCH 1/3] Don't lock mutex to set shared flag on stringbuf if it's already shared >From 5f558244261f3a22217d5136d0aebb7f644d7efb Mon Sep 17 00:00:00 2001 From: Mark H Weaver Date: Mon, 5 Mar 2012 09:51:17 -0500 Subject: [PATCH 1/3] Don't lock mutex to set shared flag on stringbuf if it's already shared * libguile/strings.c (set_stringbuf_shared): New internal static function to replace the macro SET_STRINGBUF_SHARED. The macro assumed that the stringbuf_write_mutex was already locked, but this new function handles locking internally, and avoids locking if the stringbuf is already shared. (SET_STRINGBUF_SHARED): Removed. (scm_i_make_string, scm_i_substring, scm_i_substring_read_only, scm_i_make_symbol, scm_i_symbol_substring): Use set_stringbuf_shared instead of SET_STRINGBUF_SHARED. --- libguile/strings.c | 41 ++++++++++++++++++----------------------- 1 files changed, 18 insertions(+), 23 deletions(-) diff --git a/libguile/strings.c b/libguile/strings.c index 494a658..35757f0 100644 --- a/libguile/strings.c +++ b/libguile/strings.c @@ -91,16 +91,6 @@ #define STRINGBUF_LENGTH(buf) (SCM_CELL_WORD_1 (buf)) -#define SET_STRINGBUF_SHARED(buf) \ - do \ - { \ - /* Don't modify BUF if it's already marked as shared since it might be \ - a read-only, statically allocated stringbuf. */ \ - if (SCM_LIKELY (!STRINGBUF_SHARED (buf))) \ - SCM_SET_CELL_WORD_0 ((buf), SCM_CELL_WORD_0 (buf) | STRINGBUF_F_SHARED); \ - } \ - while (0) - #ifdef SCM_STRING_LENGTH_HISTOGRAM static size_t lenhist[1001]; #endif @@ -227,6 +217,19 @@ narrow_stringbuf (SCM buf) scm_i_pthread_mutex_t stringbuf_write_mutex = SCM_I_PTHREAD_MUTEX_INITIALIZER; +static void +set_stringbuf_shared (SCM buf) +{ + /* Don't modify BUF if it's already marked as shared since it + might be a read-only, statically allocated stringbuf. */ + if (!STRINGBUF_SHARED (buf)) + { + scm_i_pthread_mutex_lock (&stringbuf_write_mutex); + SCM_SET_CELL_WORD_0 (buf, SCM_CELL_WORD_0 (buf) | STRINGBUF_F_SHARED); + scm_i_pthread_mutex_unlock (&stringbuf_write_mutex); + } +} + /* Copy-on-write strings. */ @@ -276,7 +279,7 @@ scm_i_make_string (size_t len, char **charsp, int read_only_p) if (SCM_UNLIKELY (scm_is_false (null_stringbuf))) { null_stringbuf = make_stringbuf (0); - SET_STRINGBUF_SHARED (null_stringbuf); + set_stringbuf_shared (null_stringbuf); } buf = null_stringbuf; } @@ -341,9 +344,7 @@ scm_i_substring (SCM str, size_t start, size_t end) SCM buf; size_t str_start; get_str_buf_start (&str, &buf, &str_start); - scm_i_pthread_mutex_lock (&stringbuf_write_mutex); - SET_STRINGBUF_SHARED (buf); - scm_i_pthread_mutex_unlock (&stringbuf_write_mutex); + set_stringbuf_shared (buf); return scm_double_cell (STRING_TAG, SCM_UNPACK(buf), (scm_t_bits)str_start + start, (scm_t_bits) end - start); @@ -360,9 +361,7 @@ scm_i_substring_read_only (SCM str, size_t start, size_t end) SCM buf; size_t str_start; get_str_buf_start (&str, &buf, &str_start); - scm_i_pthread_mutex_lock (&stringbuf_write_mutex); - SET_STRINGBUF_SHARED (buf); - scm_i_pthread_mutex_unlock (&stringbuf_write_mutex); + set_stringbuf_shared (buf); return scm_double_cell (RO_STRING_TAG, SCM_UNPACK(buf), (scm_t_bits)str_start + start, (scm_t_bits) end - start); @@ -753,9 +752,7 @@ scm_i_make_symbol (SCM name, scm_t_bits flags, if (start == 0 && length == STRINGBUF_LENGTH (buf)) { /* reuse buf. */ - scm_i_pthread_mutex_lock (&stringbuf_write_mutex); - SET_STRINGBUF_SHARED (buf); - scm_i_pthread_mutex_unlock (&stringbuf_write_mutex); + set_stringbuf_shared (buf); } else { @@ -854,9 +851,7 @@ SCM scm_i_symbol_substring (SCM sym, size_t start, size_t end) { SCM buf = SYMBOL_STRINGBUF (sym); - scm_i_pthread_mutex_lock (&stringbuf_write_mutex); - SET_STRINGBUF_SHARED (buf); - scm_i_pthread_mutex_unlock (&stringbuf_write_mutex); + set_stringbuf_shared (buf); return scm_double_cell (RO_STRING_TAG, SCM_UNPACK (buf), (scm_t_bits)start, (scm_t_bits) end - start); } -- 1.7.5.4 --=-=-= Content-Type: text/x-patch Content-Disposition: inline; filename=0002-Move-prototype-for-scm_i_try_narrow_string-where-it-.patch Content-Description: [PATCH 2/3] Move prototype for scm_i_try_narrow_string where it belongs >From 6c644645ecd2b1e84754b4759789edab2fdf9260 Mon Sep 17 00:00:00 2001 From: Mark H Weaver Date: Mon, 5 Mar 2012 10:06:34 -0500 Subject: [PATCH 2/3] Move prototype for scm_i_try_narrow_string where it belongs * libguile/strings.h (scm_i_try_narrow_string): Move prototype out of the "internal functions related to symbols" section. --- libguile/strings.h | 3 ++- 1 files changed, 2 insertions(+), 1 deletions(-) diff --git a/libguile/strings.h b/libguile/strings.h index 42e57ac..9735913 100644 --- a/libguile/strings.h +++ b/libguile/strings.h @@ -195,10 +195,12 @@ SCM_INTERNAL const void *scm_i_string_data (SCM str); SCM_INTERNAL SCM scm_i_string_start_writing (SCM str); SCM_INTERNAL void scm_i_string_stop_writing (void); SCM_INTERNAL int scm_i_is_narrow_string (SCM str); +SCM_INTERNAL int scm_i_try_narrow_string (SCM str); SCM_INTERNAL scm_t_wchar scm_i_string_ref (SCM str, size_t x); SCM_INTERNAL int scm_i_string_contains_char (SCM str, char c); SCM_INTERNAL int scm_i_string_strcmp (SCM sstr, size_t start_x, const char *cstr); SCM_INTERNAL void scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr); + /* internal functions related to symbols. */ SCM_INTERNAL SCM scm_i_make_symbol (SCM name, scm_t_bits flags, @@ -210,7 +212,6 @@ SCM_INTERNAL const char *scm_i_symbol_chars (SCM sym); SCM_INTERNAL const scm_t_wchar *scm_i_symbol_wide_chars (SCM sym); SCM_INTERNAL size_t scm_i_symbol_length (SCM sym); SCM_INTERNAL int scm_i_is_narrow_symbol (SCM str); -SCM_INTERNAL int scm_i_try_narrow_string (SCM str); SCM_INTERNAL SCM scm_i_symbol_substring (SCM sym, size_t start, size_t end); SCM_INTERNAL scm_t_wchar scm_i_symbol_ref (SCM sym, size_t x); SCM_INTERNAL void scm_encoding_error (const char *subr, int err, -- 1.7.5.4 --=-=-= Content-Type: text/x-patch Content-Disposition: inline; filename=0003-Efficient-gensym-hack-generate-gensym-names-lazily.patch Content-Description: [PATCH 3/3] Efficient gensym hack: generate gensym names lazily >From 7eff2e5ee0230b11a1ad38b4fd1cf4a470a9b3bc Mon Sep 17 00:00:00 2001 From: Mark H Weaver Date: Mon, 5 Mar 2012 10:35:06 -0500 Subject: [PATCH 3/3] Efficient gensym hack: generate gensym names lazily * libguile/strings.c (scm_i_stringbuf_from_string, scm_i_string_from_stringbuf): New internal functions. (symbol_stringbuf): New internal static function to replace most uses of SYMBOL_STRINGBUF. Handles forcing lazy gensyms. (scm_i_symbol_length, scm_c_symbol_length, scm_i_is_narrow_symbol, scm_i_symbol_chars, scm_i_symbol_wide_chars, scm_i_symbol_substring, scm_sys_symbol_dump): Use symbol_stringbuf instead of SYMBOL_STRINGBUF. * libguile/strings.h (scm_i_stringbuf_from_string, scm_i_string_from_stringbuf): Add prototypes. * libguile/symbols.c (SYMBOL_STRINGBUF): New internal macro. (scm_i_symbol_hash): New internal function to replace the macro of the same name. Handles forcing lazy gensyms. (scm_gensym): Don't construct the name or even increment the gensym_counter here. Just return a special symbol with the SCM_I_F_SYMBOL_LAZY_GENSYM flag set, with hash value 0, and with a stringbuf containing only the prefix. (scm_i_force_lazy_gensym): New internal procedure used when a lazy gensym is queried for its name or hash value. (symbol_lookup_hash_fn, symbol_lookup_assoc_fn): Avoid lazy gensym checks. * libguile/symbols.h (scm_i_symbol_hash): Remove macro, and replace it with a prototype for the new internal function of the same name. (scm_i_force_lazy_gensym): Add prototype. (scm_i_symbol_is_lazy_gensym): New macro. (SCM_I_F_SYMBOL_LAZY_GENSYM): New flag. * doc/ref/api-data.texi (Symbol Primitives): Update documentation. * test-suite/tests/symbols.test (gensym): Add tests. --- doc/ref/api-data.texi | 4 +- libguile/strings.c | 58 +++++++++++++++-- libguile/strings.h | 2 + libguile/symbols.c | 140 ++++++++++++++++++++++++++++++++++------- libguile/symbols.h | 6 ++- test-suite/tests/symbols.test | 36 ++++++++++- 6 files changed, 211 insertions(+), 35 deletions(-) diff --git a/doc/ref/api-data.texi b/doc/ref/api-data.texi index 39c9790..a1203f0 100644 --- a/doc/ref/api-data.texi +++ b/doc/ref/api-data.texi @@ -5293,8 +5293,8 @@ code. The @code{gensym} primitive meets this need: @deffnx {C Function} scm_gensym (prefix) Create a new symbol with a name constructed from a prefix and a counter value. The string @var{prefix} can be specified as an optional -argument. Default prefix is @samp{@w{ g}}. The counter is increased by 1 -at each call. There is no provision for resetting the counter. +argument. Default prefix is @samp{@w{ g}}. The name is constructed +lazily, when the name or hash of the symbol is first requested. @end deffn The symbols generated by @code{gensym} are @emph{likely} to be unique, diff --git a/libguile/strings.c b/libguile/strings.c index 35757f0..b4f42d4 100644 --- a/libguile/strings.c +++ b/libguile/strings.c @@ -334,6 +334,41 @@ get_str_buf_start (SCM *str, SCM *buf, size_t *start) *buf = STRING_STRINGBUF (*str); } +/* scm_i_stringbuf_from_string returns a stringbuf containing exactly + the characters in 'str'. If possible, it returns 'str's stringbuf + (marking it shared). However, if 'str' refers to only part of its + stringbuf, the relevant portion is copied into a fresh stringbuf. + + This is needed by the lazy gensym code in symbols.c. */ +SCM +scm_i_stringbuf_from_string (SCM str) +{ + SCM inner_str, buf; + size_t len, start; + + len = STRING_LENGTH (str); + inner_str = str; + get_str_buf_start (&inner_str, &buf, &start); + if (STRINGBUF_LENGTH (buf) == len) + set_stringbuf_shared (buf); + else + { + SCM new_str = scm_i_substring_copy (str, 0, len); + buf = STRING_STRINGBUF (new_str); + } + return buf; +} + +/* Needed by the lazy gensym code in symbols.c. */ +SCM +scm_i_string_from_stringbuf (SCM buf) +{ + size_t len = STRINGBUF_LENGTH (buf); + set_stringbuf_shared (buf); + return scm_double_cell (RO_STRING_TAG, SCM_UNPACK (buf), + (scm_t_bits) 0, (scm_t_bits) len); +} + SCM scm_i_substring (SCM str, size_t start, size_t end) { @@ -732,8 +767,17 @@ scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr) internals of strings and string-like objects confined to this file. */ +/* Must be kept in sync with the matching definition in symbols.c */ #define SYMBOL_STRINGBUF SCM_CELL_OBJECT_1 +static SCM +symbol_stringbuf (SCM symbol) +{ + if (SCM_UNLIKELY (scm_i_symbol_is_lazy_gensym (symbol))) + scm_i_force_lazy_gensym (symbol); + return SYMBOL_STRINGBUF (symbol); +} + SCM scm_i_make_symbol (SCM name, scm_t_bits flags, unsigned long hash, SCM props) @@ -793,7 +837,7 @@ scm_i_c_make_symbol (const char *name, size_t len, size_t scm_i_symbol_length (SCM sym) { - return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym)); + return STRINGBUF_LENGTH (symbol_stringbuf (sym)); } size_t @@ -802,7 +846,7 @@ scm_c_symbol_length (SCM sym) { SCM_VALIDATE_SYMBOL (1, sym); - return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym)); + return STRINGBUF_LENGTH (symbol_stringbuf (sym)); } #undef FUNC_NAME @@ -813,7 +857,7 @@ scm_i_is_narrow_symbol (SCM sym) { SCM buf; - buf = SYMBOL_STRINGBUF (sym); + buf = symbol_stringbuf (sym); return !STRINGBUF_WIDE (buf); } @@ -824,7 +868,7 @@ scm_i_symbol_chars (SCM sym) { SCM buf; - buf = SYMBOL_STRINGBUF (sym); + buf = symbol_stringbuf (sym); if (!STRINGBUF_WIDE (buf)) return (const char *) STRINGBUF_CHARS (buf); else @@ -839,7 +883,7 @@ scm_i_symbol_wide_chars (SCM sym) { SCM buf; - buf = SYMBOL_STRINGBUF (sym); + buf = symbol_stringbuf (sym); if (STRINGBUF_WIDE (buf)) return (const scm_t_wchar *) STRINGBUF_WIDE_CHARS (buf); else @@ -850,7 +894,7 @@ scm_i_symbol_wide_chars (SCM sym) SCM scm_i_symbol_substring (SCM sym, size_t start, size_t end) { - SCM buf = SYMBOL_STRINGBUF (sym); + SCM buf = symbol_stringbuf (sym); set_stringbuf_shared (buf); return scm_double_cell (RO_STRING_TAG, SCM_UNPACK (buf), (scm_t_bits)start, (scm_t_bits) end - start); @@ -1000,7 +1044,7 @@ SCM_DEFINE (scm_sys_symbol_dump, "%symbol-dump", 1, 0, 0, (SCM sym), scm_from_ulong (scm_i_symbol_hash (sym))); e3 = scm_cons (scm_from_latin1_symbol ("interned"), scm_symbol_interned_p (sym)); - buf = SYMBOL_STRINGBUF (sym); + buf = symbol_stringbuf (sym); /* Stringbuf info */ if (!STRINGBUF_WIDE (buf)) diff --git a/libguile/strings.h b/libguile/strings.h index 9735913..afb5a53 100644 --- a/libguile/strings.h +++ b/libguile/strings.h @@ -200,6 +200,8 @@ SCM_INTERNAL scm_t_wchar scm_i_string_ref (SCM str, size_t x); SCM_INTERNAL int scm_i_string_contains_char (SCM str, char c); SCM_INTERNAL int scm_i_string_strcmp (SCM sstr, size_t start_x, const char *cstr); SCM_INTERNAL void scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr); +SCM_INTERNAL SCM scm_i_stringbuf_from_string (SCM str); +SCM_INTERNAL SCM scm_i_string_from_stringbuf (SCM buf); /* internal functions related to symbols. */ diff --git a/libguile/symbols.c b/libguile/symbols.c index 08512a6..07556fa 100644 --- a/libguile/symbols.c +++ b/libguile/symbols.c @@ -70,6 +70,19 @@ SCM_DEFINE (scm_sys_symbols, "%symbols", 0, 0, 0, /* {Symbols} */ +/* Must be kept in sync with the matching definition in strings.c */ +#define SYMBOL_STRINGBUF SCM_CELL_OBJECT_1 + +#define SYMBOL_HASH(x) ((unsigned long) SCM_CELL_WORD_2 (x)) + +unsigned long +scm_i_symbol_hash (SCM symbol) +{ + if (SCM_UNLIKELY (scm_i_symbol_is_lazy_gensym (symbol))) + scm_i_force_lazy_gensym (symbol); + return SYMBOL_HASH (symbol); +} + unsigned long scm_i_hash_symbol (SCM obj, unsigned long n, void *closure) { @@ -165,7 +178,16 @@ lookup_interned_latin1_symbol (const char *str, size_t len, static unsigned long symbol_lookup_hash_fn (SCM obj, unsigned long max, void *closure) { - return scm_i_symbol_hash (obj) % max; + /* We must avoid forcing lazy gensyms here, because + scm_i_force_lazy_gensym needs to intern its symbol before clearing + the lazy gensym flag. */ + return SYMBOL_HASH (obj) % max; +} + +static SCM +symbol_to_string_no_lazy_gensym_check (SCM sym) +{ + return scm_i_string_from_stringbuf (SYMBOL_STRINGBUF (sym)); } static SCM @@ -175,9 +197,13 @@ symbol_lookup_assoc_fn (SCM obj, SCM alist, void *closure) { SCM sym = SCM_CAAR (alist); - if (scm_i_symbol_hash (sym) == scm_i_symbol_hash (obj) - && scm_is_true (scm_string_equal_p (scm_symbol_to_string (sym), - scm_symbol_to_string (obj)))) + /* We must avoid forcing lazy gensyms here, because + scm_i_force_lazy_gensym needs to intern its symbol before + clearing the lazy gensym flag. */ + if (SYMBOL_HASH (sym) == SYMBOL_HASH (obj) + && scm_is_true (scm_string_equal_p + (symbol_to_string_no_lazy_gensym_check (sym), + symbol_to_string_no_lazy_gensym_check (obj)))) return SCM_CAR (alist); } @@ -340,38 +366,104 @@ SCM_DEFINE (scm_string_ci_to_symbol, "string-ci->symbol", 1, 0, 0, /* The default prefix for `gensym'd symbols. */ static SCM default_gensym_prefix; -#define MAX_PREFIX_LENGTH 30 - SCM_DEFINE (scm_gensym, "gensym", 0, 1, 0, (SCM prefix), "Create a new symbol with a name constructed from a prefix and\n" - "a counter value. The string @var{prefix} can be specified as\n" - "an optional argument. Default prefix is @code{ g}. The counter\n" - "is increased by 1 at each call. There is no provision for\n" - "resetting the counter.") + "a counter value. The string @var{prefix} can be specified as\n" + "an optional argument. Default prefix is @code{ g}. The name\n" + "is constructed lazily, when the name or hash of the symbol is\n" + "first requested.") #define FUNC_NAME s_scm_gensym { - static int gensym_counter = 0; - - SCM suffix, name; - int n, n_digits; - char buf[SCM_INTBUFLEN]; + SCM prefix_stringbuf; if (SCM_UNBNDP (prefix)) prefix = default_gensym_prefix; + else + SCM_VALIDATE_STRING (1, prefix); - /* mutex in case another thread looks and incs at the exact same moment */ - scm_i_scm_pthread_mutex_lock (&scm_i_misc_mutex); - n = gensym_counter++; - scm_i_pthread_mutex_unlock (&scm_i_misc_mutex); + prefix_stringbuf = scm_i_stringbuf_from_string (prefix); - n_digits = scm_iint2str (n, 10, buf); - suffix = scm_from_latin1_stringn (buf, n_digits); - name = scm_string_append (scm_list_2 (prefix, suffix)); - return scm_string_to_symbol (name); + /* Allocate a special symbol with the lazy gensym flag set. Except + for a few special exceptions, all code must check this flag before + accessing the name or hash fields of symbols. When the gensym is + forced, it will set the name and hash fields to their final values, + and then clear the lazy gensym flag. For now, we store the gensym + prefix as the symbol name, and 0 as the hash value. */ + return scm_double_cell (scm_tc7_symbol | SCM_I_F_SYMBOL_LAZY_GENSYM, + SCM_UNPACK (prefix_stringbuf), (scm_t_bits) 0, + SCM_UNPACK (scm_cons (SCM_BOOL_F, SCM_EOL))); } #undef FUNC_NAME +/* + * Forcing lazy gensyms + * + * Here we must choose a name for our gensym and set its 'equal?' hash + * value to match its name. In most cases, we will simply append the + * current gensym counter to the prefix to form the name, increment the + * counter, and intern the symbol. However, there are some + * complications. + * + * The name we ultimately assign to the gensym _must_ not already be + * interned. To understand why, consider this scenario: Suppose the + * user asks for a lazy gensym with prefix "foo", and we assign it the + * number 6. Now suppose sometime later, but before the gensym is + * forced, the symbol 'foo6' is independently interned. Now we have two + * distinct symbols (in the sense of 'eq?'), both semantically interned, + * with the same name. This is a violation of the most fundamental + * property of symbols. + * + * Therefore, if the first counter value we try yields a name that has + * already been interned, we try the next counter value, and repeat + * until we successfully intern our symbol. Only then can we clear the + * lazy gensym flag and thereby allow the name and 'equal?' hash value + * to be accessed. + */ +void +scm_i_force_lazy_gensym (SCM sym) +{ + static int gensym_counter = 0; + + SCM prefix, suffix, name, handle; + int n, n_digits; + char buf[SCM_INTBUFLEN]; + + /* mutex in case another thread forces a gensym (possibly this one) */ + scm_i_pthread_mutex_lock (&symbols_lock); + if (SCM_LIKELY (scm_i_symbol_is_lazy_gensym (sym))) + { + prefix = scm_i_string_from_stringbuf (SYMBOL_STRINGBUF (sym)); + + do + { + n = gensym_counter++; + + n_digits = scm_iint2str (n, 10, buf); + suffix = scm_from_latin1_stringn (buf, n_digits); + name = scm_string_append (scm_list_2 (prefix, suffix)); + + /* Set the name and hash to their candidate values. */ + SCM_SET_CELL_OBJECT_1 (sym, scm_i_stringbuf_from_string (name)); + SCM_SET_CELL_WORD_2 (sym, scm_i_string_hash (name)); + + /* Attempt to intern the symbol */ + handle = scm_hash_fn_create_handle_x (symbols, sym, SCM_UNDEFINED, + symbol_lookup_hash_fn, + symbol_lookup_assoc_fn, + NULL); + } while (SCM_UNLIKELY (!scm_is_eq (sym, SCM_CAR (handle)))); + + /* We must not clear the lazy gensym flag until our symbol has + been interned. The lock does not save us here, because another + thread could retrieve our gensym's name or hash outside of any + lock. */ + SCM_SET_CELL_WORD_0 (sym, (SCM_CELL_WORD_0 (sym) + & ~SCM_I_F_SYMBOL_LAZY_GENSYM)); + } + scm_i_pthread_mutex_unlock (&symbols_lock); +} + SCM_DEFINE (scm_symbol_hash, "symbol-hash", 1, 0, 0, (SCM symbol), "Return a hash value for @var{symbol}.") diff --git a/libguile/symbols.h b/libguile/symbols.h index 6106f9e..b8fe997 100644 --- a/libguile/symbols.h +++ b/libguile/symbols.h @@ -28,11 +28,13 @@ #define scm_is_symbol(x) (!SCM_IMP (x) \ && (SCM_TYP7 (x) == scm_tc7_symbol)) -#define scm_i_symbol_hash(x) ((unsigned long) SCM_CELL_WORD_2 (x)) #define scm_i_symbol_is_interned(x) \ (!(SCM_CELL_WORD_0 (x) & SCM_I_F_SYMBOL_UNINTERNED)) +#define scm_i_symbol_is_lazy_gensym(x) \ + (SCM_CELL_WORD_0 (x) & SCM_I_F_SYMBOL_LAZY_GENSYM) #define SCM_I_F_SYMBOL_UNINTERNED 0x100 +#define SCM_I_F_SYMBOL_LAZY_GENSYM 0x200 @@ -90,8 +92,10 @@ SCM_API SCM scm_take_utf8_symboln (char *sym, size_t len); /* internal functions. */ +SCM_INTERNAL unsigned long scm_i_symbol_hash (SCM symbol); SCM_INTERNAL unsigned long scm_i_hash_symbol (SCM obj, unsigned long n, void *closure); +SCM_INTERNAL void scm_i_force_lazy_gensym (SCM sym); SCM_INTERNAL void scm_symbols_prehistory (void); SCM_INTERNAL void scm_init_symbols (void); diff --git a/test-suite/tests/symbols.test b/test-suite/tests/symbols.test index 6fbc6be..0dbb121 100644 --- a/test-suite/tests/symbols.test +++ b/test-suite/tests/symbols.test @@ -149,7 +149,41 @@ (symbol? (gensym (make-string 4000 #\!)))) (pass-if "accepts embedded NULs" - (> (string-length (symbol->string (gensym "foo\0bar\0braz\0foo\0bar\0braz\0foo\0bar\0braz\0foo\0bar\0braz\0foo\0bar\0braz\0foo\0bar\0braz\0"))) 6))) + (> (string-length (symbol->string (gensym "foo\0bar\0braz\0foo\0bar\0braz\0foo\0bar\0braz\0foo\0bar\0braz\0foo\0bar\0braz\0foo\0bar\0braz\0"))) 6)) + + (pass-if "accepts substring prefixes" + (let* ((prefix (substring "foobar" 1 4)) + (symbol (gensym prefix)) + (name (symbol->string symbol))) + (string= "oob" (substring name 0 3)))) + + (pass-if "accepts shared substring prefixes" + (let* ((prefix (substring/shared (string-copy "foobar") + 1 4)) + (symbol (gensym prefix)) + (name (symbol->string symbol))) + (string= "oob" (substring name 0 3)))) + + (pass-if "counter incremented lazily" + (let* ((s1 (gensym "")) + (s2 (gensym "")) + (s3 (gensym "")) + (s4 (gensym "")) + (s4-counter (string->number (symbol->string s4))) + (s1-counter (string->number (symbol->string s1)))) + (= s1-counter (1+ s4-counter)))) + + (pass-if "unaffected by mutation of prefix" + (let* ((prefix (string-copy "foo")) + (symbol (gensym prefix))) + (string-set! prefix 0 #\g) + (string= "foo" (substring (symbol->string symbol) 0 3)))) + + (pass-if "avoids existing interned symbols" + (let* ((n (1+ (string->number (symbol->string (gensym ""))))) + (colliding-symbol (string->symbol (number->string n))) + (symbol (gensym ""))) + (< n (string->number (symbol->string symbol)))))) (with-test-prefix "extended read syntax" (pass-if (equal? "#{}#" (object->string (string->symbol "")))) -- 1.7.5.4 --=-=-=--