unofficial mirror of guile-devel@gnu.org 
 help / color / mirror / Atom feed
From: Mark H Weaver <mhw@netris.org>
To: guile-devel@gnu.org
Subject: Re: [PATCH] Efficient Gensym Hack (v2)
Date: Tue, 06 Mar 2012 04:55:40 -0500	[thread overview]
Message-ID: <87pqcqvysj.fsf@netris.org> (raw)
In-Reply-To: <87mx7vx8zg.fsf@netris.org> (Mark H. Weaver's message of "Mon, 05 Mar 2012 12:17:55 -0500")

[-- Attachment #1: Type: text/plain, Size: 87 bytes --]

Hello all,

Here's an improved version of the Efficient Gensym Hack (v2).

     Mark



[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: [PATCH 1/3] Don't lock mutex to set shared flag on stringbuf if it's already shared --]
[-- Type: text/x-patch, Size: 4335 bytes --]

From 5f558244261f3a22217d5136d0aebb7f644d7efb Mon Sep 17 00:00:00 2001
From: Mark H Weaver <mhw@netris.org>
Date: Mon, 5 Mar 2012 09:51:17 -0500
Subject: [PATCH 1/3] Don't lock mutex to set shared flag on stringbuf if it's
 already shared

* libguile/strings.c (set_stringbuf_shared): New internal static
  function to replace the macro SET_STRINGBUF_SHARED.  The macro assumed
  that the stringbuf_write_mutex was already locked, but this new
  function handles locking internally, and avoids locking if the
  stringbuf is already shared.

  (SET_STRINGBUF_SHARED): Removed.

  (scm_i_make_string, scm_i_substring, scm_i_substring_read_only,
  scm_i_make_symbol, scm_i_symbol_substring): Use set_stringbuf_shared
  instead of SET_STRINGBUF_SHARED.
---
 libguile/strings.c |   41 ++++++++++++++++++-----------------------
 1 files changed, 18 insertions(+), 23 deletions(-)

diff --git a/libguile/strings.c b/libguile/strings.c
index 494a658..35757f0 100644
--- a/libguile/strings.c
+++ b/libguile/strings.c
@@ -91,16 +91,6 @@
 
 #define STRINGBUF_LENGTH(buf)   (SCM_CELL_WORD_1 (buf))
 
-#define SET_STRINGBUF_SHARED(buf)					\
-  do									\
-    {									\
-      /* Don't modify BUF if it's already marked as shared since it might be \
-	 a read-only, statically allocated stringbuf.  */		\
-      if (SCM_LIKELY (!STRINGBUF_SHARED (buf)))				\
-	SCM_SET_CELL_WORD_0 ((buf), SCM_CELL_WORD_0 (buf) | STRINGBUF_F_SHARED); \
-    }									\
-  while (0)
-
 #ifdef SCM_STRING_LENGTH_HISTOGRAM
 static size_t lenhist[1001];
 #endif
@@ -227,6 +217,19 @@ narrow_stringbuf (SCM buf)
 
 scm_i_pthread_mutex_t stringbuf_write_mutex = SCM_I_PTHREAD_MUTEX_INITIALIZER;
 
+static void
+set_stringbuf_shared (SCM buf)
+{
+  /* Don't modify BUF if it's already marked as shared since it
+     might be a read-only, statically allocated stringbuf.  */
+  if (!STRINGBUF_SHARED (buf))
+    {
+      scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
+      SCM_SET_CELL_WORD_0 (buf, SCM_CELL_WORD_0 (buf) | STRINGBUF_F_SHARED);
+      scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
+    }
+}
+
 \f
 /* Copy-on-write strings.
  */
@@ -276,7 +279,7 @@ scm_i_make_string (size_t len, char **charsp, int read_only_p)
       if (SCM_UNLIKELY (scm_is_false (null_stringbuf)))
         {
           null_stringbuf = make_stringbuf (0);
-          SET_STRINGBUF_SHARED (null_stringbuf);
+          set_stringbuf_shared (null_stringbuf);
         }
       buf = null_stringbuf;
     }
@@ -341,9 +344,7 @@ scm_i_substring (SCM str, size_t start, size_t end)
       SCM buf;
       size_t str_start;
       get_str_buf_start (&str, &buf, &str_start);
-      scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
-      SET_STRINGBUF_SHARED (buf);
-      scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
+      set_stringbuf_shared (buf);
       return scm_double_cell (STRING_TAG, SCM_UNPACK(buf),
                               (scm_t_bits)str_start + start,
                               (scm_t_bits) end - start);
@@ -360,9 +361,7 @@ scm_i_substring_read_only (SCM str, size_t start, size_t end)
       SCM buf;
       size_t str_start;
       get_str_buf_start (&str, &buf, &str_start);
-      scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
-      SET_STRINGBUF_SHARED (buf);
-      scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
+      set_stringbuf_shared (buf);
       return scm_double_cell (RO_STRING_TAG, SCM_UNPACK(buf),
                               (scm_t_bits)str_start + start,
                               (scm_t_bits) end - start);
@@ -753,9 +752,7 @@ scm_i_make_symbol (SCM name, scm_t_bits flags,
   if (start == 0 && length == STRINGBUF_LENGTH (buf))
     {
       /* reuse buf. */
-      scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
-      SET_STRINGBUF_SHARED (buf);
-      scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
+      set_stringbuf_shared (buf);
     }
   else
     {
@@ -854,9 +851,7 @@ SCM
 scm_i_symbol_substring (SCM sym, size_t start, size_t end)
 {
   SCM buf = SYMBOL_STRINGBUF (sym);
-  scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
-  SET_STRINGBUF_SHARED (buf);
-  scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
+  set_stringbuf_shared (buf);
   return scm_double_cell (RO_STRING_TAG, SCM_UNPACK (buf),
 			  (scm_t_bits)start, (scm_t_bits) end - start);
 }
-- 
1.7.5.4


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #3: [PATCH 2/3] Move prototype for scm_i_try_narrow_string where it belongs --]
[-- Type: text/x-patch, Size: 1786 bytes --]

From 6c644645ecd2b1e84754b4759789edab2fdf9260 Mon Sep 17 00:00:00 2001
From: Mark H Weaver <mhw@netris.org>
Date: Mon, 5 Mar 2012 10:06:34 -0500
Subject: [PATCH 2/3] Move prototype for scm_i_try_narrow_string where it
 belongs

* libguile/strings.h (scm_i_try_narrow_string): Move prototype out of
  the "internal functions related to symbols" section.
---
 libguile/strings.h |    3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/libguile/strings.h b/libguile/strings.h
index 42e57ac..9735913 100644
--- a/libguile/strings.h
+++ b/libguile/strings.h
@@ -195,10 +195,12 @@ SCM_INTERNAL const void *scm_i_string_data (SCM str);
 SCM_INTERNAL SCM scm_i_string_start_writing (SCM str);
 SCM_INTERNAL void scm_i_string_stop_writing (void);
 SCM_INTERNAL int scm_i_is_narrow_string (SCM str);
+SCM_INTERNAL int scm_i_try_narrow_string (SCM str);
 SCM_INTERNAL scm_t_wchar scm_i_string_ref (SCM str, size_t x);
 SCM_INTERNAL int scm_i_string_contains_char (SCM str, char c);
 SCM_INTERNAL int scm_i_string_strcmp (SCM sstr, size_t start_x, const char *cstr);
 SCM_INTERNAL void scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr);
+
 /* internal functions related to symbols. */
 
 SCM_INTERNAL SCM scm_i_make_symbol (SCM name, scm_t_bits flags,
@@ -210,7 +212,6 @@ SCM_INTERNAL const char *scm_i_symbol_chars (SCM sym);
 SCM_INTERNAL const scm_t_wchar *scm_i_symbol_wide_chars (SCM sym);
 SCM_INTERNAL size_t scm_i_symbol_length (SCM sym);
 SCM_INTERNAL int scm_i_is_narrow_symbol (SCM str);
-SCM_INTERNAL int scm_i_try_narrow_string (SCM str);
 SCM_INTERNAL SCM scm_i_symbol_substring (SCM sym, size_t start, size_t end);
 SCM_INTERNAL scm_t_wchar scm_i_symbol_ref (SCM sym, size_t x);
 SCM_INTERNAL void scm_encoding_error (const char *subr, int err,
-- 
1.7.5.4


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #4: [PATCH 3/3] Efficient gensym hack: generate gensym names lazily --]
[-- Type: text/x-patch, Size: 17555 bytes --]

From 7eff2e5ee0230b11a1ad38b4fd1cf4a470a9b3bc Mon Sep 17 00:00:00 2001
From: Mark H Weaver <mhw@netris.org>
Date: Mon, 5 Mar 2012 10:35:06 -0500
Subject: [PATCH 3/3] Efficient gensym hack: generate gensym names lazily

* libguile/strings.c (scm_i_stringbuf_from_string,
  scm_i_string_from_stringbuf): New internal functions.

  (symbol_stringbuf): New internal static function to replace most uses
  of SYMBOL_STRINGBUF.  Handles forcing lazy gensyms.

  (scm_i_symbol_length, scm_c_symbol_length, scm_i_is_narrow_symbol,
  scm_i_symbol_chars, scm_i_symbol_wide_chars, scm_i_symbol_substring,
  scm_sys_symbol_dump): Use symbol_stringbuf instead of
  SYMBOL_STRINGBUF.

* libguile/strings.h (scm_i_stringbuf_from_string,
  scm_i_string_from_stringbuf): Add prototypes.

* libguile/symbols.c (SYMBOL_STRINGBUF): New internal macro.

  (scm_i_symbol_hash): New internal function to replace the macro of the
  same name.  Handles forcing lazy gensyms.

  (scm_gensym): Don't construct the name or even increment the
  gensym_counter here.  Just return a special symbol with the
  SCM_I_F_SYMBOL_LAZY_GENSYM flag set, with hash value 0, and with a
  stringbuf containing only the prefix.

  (scm_i_force_lazy_gensym): New internal procedure used when a lazy
  gensym is queried for its name or hash value.

  (symbol_lookup_hash_fn, symbol_lookup_assoc_fn): Avoid lazy gensym
  checks.

* libguile/symbols.h (scm_i_symbol_hash): Remove macro, and replace it
  with a prototype for the new internal function of the same name.
  (scm_i_force_lazy_gensym): Add prototype.
  (scm_i_symbol_is_lazy_gensym): New macro.
  (SCM_I_F_SYMBOL_LAZY_GENSYM): New flag.

* doc/ref/api-data.texi (Symbol Primitives): Update documentation.

* test-suite/tests/symbols.test (gensym): Add tests.
---
 doc/ref/api-data.texi         |    4 +-
 libguile/strings.c            |   58 +++++++++++++++--
 libguile/strings.h            |    2 +
 libguile/symbols.c            |  140 ++++++++++++++++++++++++++++++++++-------
 libguile/symbols.h            |    6 ++-
 test-suite/tests/symbols.test |   36 ++++++++++-
 6 files changed, 211 insertions(+), 35 deletions(-)

diff --git a/doc/ref/api-data.texi b/doc/ref/api-data.texi
index 39c9790..a1203f0 100644
--- a/doc/ref/api-data.texi
+++ b/doc/ref/api-data.texi
@@ -5293,8 +5293,8 @@ code.  The @code{gensym} primitive meets this need:
 @deffnx {C Function} scm_gensym (prefix)
 Create a new symbol with a name constructed from a prefix and a counter
 value.  The string @var{prefix} can be specified as an optional
-argument.  Default prefix is @samp{@w{ g}}.  The counter is increased by 1
-at each call.  There is no provision for resetting the counter.
+argument.  Default prefix is @samp{@w{ g}}.  The name is constructed
+lazily, when the name or hash of the symbol is first requested.
 @end deffn
 
 The symbols generated by @code{gensym} are @emph{likely} to be unique,
diff --git a/libguile/strings.c b/libguile/strings.c
index 35757f0..b4f42d4 100644
--- a/libguile/strings.c
+++ b/libguile/strings.c
@@ -334,6 +334,41 @@ get_str_buf_start (SCM *str, SCM *buf, size_t *start)
   *buf = STRING_STRINGBUF (*str);
 }
 
+/* scm_i_stringbuf_from_string returns a stringbuf containing exactly
+   the characters in 'str'.  If possible, it returns 'str's stringbuf
+   (marking it shared).  However, if 'str' refers to only part of its
+   stringbuf, the relevant portion is copied into a fresh stringbuf.
+
+   This is needed by the lazy gensym code in symbols.c. */
+SCM
+scm_i_stringbuf_from_string (SCM str)
+{
+  SCM inner_str, buf;
+  size_t len, start;
+
+  len = STRING_LENGTH (str);
+  inner_str = str;
+  get_str_buf_start (&inner_str, &buf, &start);
+  if (STRINGBUF_LENGTH (buf) == len)
+    set_stringbuf_shared (buf);
+  else
+    {
+      SCM new_str = scm_i_substring_copy (str, 0, len);
+      buf = STRING_STRINGBUF (new_str);
+    }
+  return buf;
+}
+
+/* Needed by the lazy gensym code in symbols.c. */
+SCM
+scm_i_string_from_stringbuf (SCM buf)
+{
+  size_t len = STRINGBUF_LENGTH (buf);
+  set_stringbuf_shared (buf);
+  return scm_double_cell (RO_STRING_TAG, SCM_UNPACK (buf),
+                          (scm_t_bits) 0, (scm_t_bits) len);
+}
+
 SCM
 scm_i_substring (SCM str, size_t start, size_t end)
 {
@@ -732,8 +767,17 @@ scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr)
    internals of strings and string-like objects confined to this file.
 */
 
+/* Must be kept in sync with the matching definition in symbols.c */
 #define SYMBOL_STRINGBUF SCM_CELL_OBJECT_1
 
+static SCM
+symbol_stringbuf (SCM symbol)
+{
+  if (SCM_UNLIKELY (scm_i_symbol_is_lazy_gensym (symbol)))
+    scm_i_force_lazy_gensym (symbol);
+  return SYMBOL_STRINGBUF (symbol);
+}
+
 SCM
 scm_i_make_symbol (SCM name, scm_t_bits flags,
 		   unsigned long hash, SCM props)
@@ -793,7 +837,7 @@ scm_i_c_make_symbol (const char *name, size_t len,
 size_t
 scm_i_symbol_length (SCM sym)
 {
-  return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym));
+  return STRINGBUF_LENGTH (symbol_stringbuf (sym));
 }
 
 size_t
@@ -802,7 +846,7 @@ scm_c_symbol_length (SCM sym)
 {
   SCM_VALIDATE_SYMBOL (1, sym);
 
-  return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym));
+  return STRINGBUF_LENGTH (symbol_stringbuf (sym));
 }
 #undef FUNC_NAME
 
@@ -813,7 +857,7 @@ scm_i_is_narrow_symbol (SCM sym)
 {
   SCM buf;
 
-  buf = SYMBOL_STRINGBUF (sym);
+  buf = symbol_stringbuf (sym);
   return !STRINGBUF_WIDE (buf);
 }
 
@@ -824,7 +868,7 @@ scm_i_symbol_chars (SCM sym)
 {
   SCM buf;
 
-  buf = SYMBOL_STRINGBUF (sym);
+  buf = symbol_stringbuf (sym);
   if (!STRINGBUF_WIDE (buf))
     return (const char *) STRINGBUF_CHARS (buf);
   else
@@ -839,7 +883,7 @@ scm_i_symbol_wide_chars (SCM sym)
 {
   SCM buf;
 
-  buf = SYMBOL_STRINGBUF (sym);
+  buf = symbol_stringbuf (sym);
   if (STRINGBUF_WIDE (buf))
     return (const scm_t_wchar *) STRINGBUF_WIDE_CHARS (buf);
   else
@@ -850,7 +894,7 @@ scm_i_symbol_wide_chars (SCM sym)
 SCM
 scm_i_symbol_substring (SCM sym, size_t start, size_t end)
 {
-  SCM buf = SYMBOL_STRINGBUF (sym);
+  SCM buf = symbol_stringbuf (sym);
   set_stringbuf_shared (buf);
   return scm_double_cell (RO_STRING_TAG, SCM_UNPACK (buf),
 			  (scm_t_bits)start, (scm_t_bits) end - start);
@@ -1000,7 +1044,7 @@ SCM_DEFINE (scm_sys_symbol_dump, "%symbol-dump", 1, 0, 0, (SCM sym),
                  scm_from_ulong (scm_i_symbol_hash (sym)));
   e3 = scm_cons (scm_from_latin1_symbol ("interned"),
                  scm_symbol_interned_p (sym));
-  buf = SYMBOL_STRINGBUF (sym);
+  buf = symbol_stringbuf (sym);
 
   /* Stringbuf info */
   if (!STRINGBUF_WIDE (buf))
diff --git a/libguile/strings.h b/libguile/strings.h
index 9735913..afb5a53 100644
--- a/libguile/strings.h
+++ b/libguile/strings.h
@@ -200,6 +200,8 @@ SCM_INTERNAL scm_t_wchar scm_i_string_ref (SCM str, size_t x);
 SCM_INTERNAL int scm_i_string_contains_char (SCM str, char c);
 SCM_INTERNAL int scm_i_string_strcmp (SCM sstr, size_t start_x, const char *cstr);
 SCM_INTERNAL void scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr);
+SCM_INTERNAL SCM scm_i_stringbuf_from_string (SCM str);
+SCM_INTERNAL SCM scm_i_string_from_stringbuf (SCM buf);
 
 /* internal functions related to symbols. */
 
diff --git a/libguile/symbols.c b/libguile/symbols.c
index 08512a6..07556fa 100644
--- a/libguile/symbols.c
+++ b/libguile/symbols.c
@@ -70,6 +70,19 @@ SCM_DEFINE (scm_sys_symbols, "%symbols", 0, 0, 0,
 /* {Symbols}
  */
 
+/* Must be kept in sync with the matching definition in strings.c */
+#define SYMBOL_STRINGBUF SCM_CELL_OBJECT_1
+
+#define SYMBOL_HASH(x) ((unsigned long) SCM_CELL_WORD_2 (x))
+
+unsigned long
+scm_i_symbol_hash (SCM symbol)
+{
+  if (SCM_UNLIKELY (scm_i_symbol_is_lazy_gensym (symbol)))
+    scm_i_force_lazy_gensym (symbol);
+  return SYMBOL_HASH (symbol);
+}
+
 unsigned long
 scm_i_hash_symbol (SCM obj, unsigned long n, void *closure)
 {
@@ -165,7 +178,16 @@ lookup_interned_latin1_symbol (const char *str, size_t len,
 static unsigned long
 symbol_lookup_hash_fn (SCM obj, unsigned long max, void *closure)
 {
-  return scm_i_symbol_hash (obj) % max;
+  /* We must avoid forcing lazy gensyms here, because
+     scm_i_force_lazy_gensym needs to intern its symbol before clearing
+     the lazy gensym flag. */
+  return SYMBOL_HASH (obj) % max;
+}
+
+static SCM
+symbol_to_string_no_lazy_gensym_check (SCM sym)
+{
+  return scm_i_string_from_stringbuf (SYMBOL_STRINGBUF (sym));
 }
 
 static SCM
@@ -175,9 +197,13 @@ symbol_lookup_assoc_fn (SCM obj, SCM alist, void *closure)
     {
       SCM sym = SCM_CAAR (alist);
 
-      if (scm_i_symbol_hash (sym) == scm_i_symbol_hash (obj)
-          && scm_is_true (scm_string_equal_p (scm_symbol_to_string (sym),
-                                              scm_symbol_to_string (obj))))
+      /* We must avoid forcing lazy gensyms here, because
+         scm_i_force_lazy_gensym needs to intern its symbol before
+         clearing the lazy gensym flag. */
+      if (SYMBOL_HASH (sym) == SYMBOL_HASH (obj)
+          && scm_is_true (scm_string_equal_p
+                          (symbol_to_string_no_lazy_gensym_check (sym),
+                           symbol_to_string_no_lazy_gensym_check (obj))))
         return SCM_CAR (alist);
     }
 
@@ -340,38 +366,104 @@ SCM_DEFINE (scm_string_ci_to_symbol, "string-ci->symbol", 1, 0, 0,
 /* The default prefix for `gensym'd symbols.  */
 static SCM default_gensym_prefix;
 
-#define MAX_PREFIX_LENGTH 30
-
 SCM_DEFINE (scm_gensym, "gensym", 0, 1, 0,
             (SCM prefix),
 	    "Create a new symbol with a name constructed from a prefix and\n"
-	    "a counter value. The string @var{prefix} can be specified as\n"
-	    "an optional argument. Default prefix is @code{ g}.  The counter\n"
-	    "is increased by 1 at each call. There is no provision for\n"
-	    "resetting the counter.")
+	    "a counter value.  The string @var{prefix} can be specified as\n"
+	    "an optional argument.  Default prefix is @code{ g}.  The name\n"
+            "is constructed lazily, when the name or hash of the symbol is\n"
+            "first requested.")
 #define FUNC_NAME s_scm_gensym
 {
-  static int gensym_counter = 0;
-  
-  SCM suffix, name;
-  int n, n_digits;
-  char buf[SCM_INTBUFLEN];
+  SCM prefix_stringbuf;
 
   if (SCM_UNBNDP (prefix))
     prefix = default_gensym_prefix;
+  else
+    SCM_VALIDATE_STRING (1, prefix);
 
-  /* mutex in case another thread looks and incs at the exact same moment */
-  scm_i_scm_pthread_mutex_lock (&scm_i_misc_mutex);
-  n = gensym_counter++;
-  scm_i_pthread_mutex_unlock (&scm_i_misc_mutex);
+  prefix_stringbuf = scm_i_stringbuf_from_string (prefix);
 
-  n_digits = scm_iint2str (n, 10, buf);
-  suffix = scm_from_latin1_stringn (buf, n_digits);
-  name = scm_string_append (scm_list_2 (prefix, suffix));
-  return scm_string_to_symbol (name);
+  /* Allocate a special symbol with the lazy gensym flag set.  Except
+     for a few special exceptions, all code must check this flag before
+     accessing the name or hash fields of symbols.  When the gensym is
+     forced, it will set the name and hash fields to their final values,
+     and then clear the lazy gensym flag.  For now, we store the gensym
+     prefix as the symbol name, and 0 as the hash value. */
+  return scm_double_cell (scm_tc7_symbol | SCM_I_F_SYMBOL_LAZY_GENSYM,
+                          SCM_UNPACK (prefix_stringbuf), (scm_t_bits) 0,
+                          SCM_UNPACK (scm_cons (SCM_BOOL_F, SCM_EOL)));
 }
 #undef FUNC_NAME
 
+/*
+ * Forcing lazy gensyms
+ *
+ * Here we must choose a name for our gensym and set its 'equal?' hash
+ * value to match its name.  In most cases, we will simply append the
+ * current gensym counter to the prefix to form the name, increment the
+ * counter, and intern the symbol.  However, there are some
+ * complications.
+ *
+ * The name we ultimately assign to the gensym _must_ not already be
+ * interned.  To understand why, consider this scenario: Suppose the
+ * user asks for a lazy gensym with prefix "foo", and we assign it the
+ * number 6.  Now suppose sometime later, but before the gensym is
+ * forced, the symbol 'foo6' is independently interned.  Now we have two
+ * distinct symbols (in the sense of 'eq?'), both semantically interned,
+ * with the same name.  This is a violation of the most fundamental
+ * property of symbols.
+ *
+ * Therefore, if the first counter value we try yields a name that has
+ * already been interned, we try the next counter value, and repeat
+ * until we successfully intern our symbol.  Only then can we clear the
+ * lazy gensym flag and thereby allow the name and 'equal?' hash value
+ * to be accessed.
+ */
+void
+scm_i_force_lazy_gensym (SCM sym)
+{
+  static int gensym_counter = 0;
+
+  SCM prefix, suffix, name, handle;
+  int n, n_digits;
+  char buf[SCM_INTBUFLEN];
+
+  /* mutex in case another thread forces a gensym (possibly this one) */
+  scm_i_pthread_mutex_lock (&symbols_lock);
+  if (SCM_LIKELY (scm_i_symbol_is_lazy_gensym (sym)))
+    {
+      prefix = scm_i_string_from_stringbuf (SYMBOL_STRINGBUF (sym));
+
+      do
+        {
+          n = gensym_counter++;
+
+          n_digits = scm_iint2str (n, 10, buf);
+          suffix = scm_from_latin1_stringn (buf, n_digits);
+          name = scm_string_append (scm_list_2 (prefix, suffix));
+
+          /* Set the name and hash to their candidate values. */
+          SCM_SET_CELL_OBJECT_1 (sym, scm_i_stringbuf_from_string (name));
+          SCM_SET_CELL_WORD_2   (sym, scm_i_string_hash (name));
+
+          /* Attempt to intern the symbol */
+          handle = scm_hash_fn_create_handle_x (symbols, sym, SCM_UNDEFINED,
+                                                symbol_lookup_hash_fn,
+                                                symbol_lookup_assoc_fn,
+                                                NULL);
+        } while (SCM_UNLIKELY (!scm_is_eq (sym, SCM_CAR (handle))));
+
+      /* We must not clear the lazy gensym flag until our symbol has
+         been interned.  The lock does not save us here, because another
+         thread could retrieve our gensym's name or hash outside of any
+         lock. */
+      SCM_SET_CELL_WORD_0 (sym, (SCM_CELL_WORD_0 (sym)
+                                 & ~SCM_I_F_SYMBOL_LAZY_GENSYM));
+    }
+  scm_i_pthread_mutex_unlock (&symbols_lock);
+}
+
 SCM_DEFINE (scm_symbol_hash, "symbol-hash", 1, 0, 0, 
 	    (SCM symbol),
 	    "Return a hash value for @var{symbol}.")
diff --git a/libguile/symbols.h b/libguile/symbols.h
index 6106f9e..b8fe997 100644
--- a/libguile/symbols.h
+++ b/libguile/symbols.h
@@ -28,11 +28,13 @@
 
 #define scm_is_symbol(x)            (!SCM_IMP (x) \
                                      && (SCM_TYP7 (x) == scm_tc7_symbol))
-#define scm_i_symbol_hash(x)        ((unsigned long) SCM_CELL_WORD_2 (x))
 #define scm_i_symbol_is_interned(x) \
   (!(SCM_CELL_WORD_0 (x) & SCM_I_F_SYMBOL_UNINTERNED))
+#define scm_i_symbol_is_lazy_gensym(x) \
+  (SCM_CELL_WORD_0 (x) & SCM_I_F_SYMBOL_LAZY_GENSYM)
 
 #define SCM_I_F_SYMBOL_UNINTERNED   0x100
+#define SCM_I_F_SYMBOL_LAZY_GENSYM  0x200
 
 \f
 
@@ -90,8 +92,10 @@ SCM_API SCM scm_take_utf8_symboln (char *sym, size_t len);
 
 /* internal functions. */
 
+SCM_INTERNAL unsigned long scm_i_symbol_hash (SCM symbol);
 SCM_INTERNAL unsigned long scm_i_hash_symbol (SCM obj, unsigned long n,
 					 void *closure);
+SCM_INTERNAL void scm_i_force_lazy_gensym (SCM sym);
 
 SCM_INTERNAL void scm_symbols_prehistory (void);
 SCM_INTERNAL void scm_init_symbols (void);
diff --git a/test-suite/tests/symbols.test b/test-suite/tests/symbols.test
index 6fbc6be..0dbb121 100644
--- a/test-suite/tests/symbols.test
+++ b/test-suite/tests/symbols.test
@@ -149,7 +149,41 @@
     (symbol? (gensym (make-string 4000 #\!))))
 
   (pass-if "accepts embedded NULs"
-    (> (string-length (symbol->string (gensym "foo\0bar\0braz\0foo\0bar\0braz\0foo\0bar\0braz\0foo\0bar\0braz\0foo\0bar\0braz\0foo\0bar\0braz\0"))) 6)))
+    (> (string-length (symbol->string (gensym "foo\0bar\0braz\0foo\0bar\0braz\0foo\0bar\0braz\0foo\0bar\0braz\0foo\0bar\0braz\0foo\0bar\0braz\0"))) 6))
+
+  (pass-if "accepts substring prefixes"
+    (let* ((prefix (substring "foobar" 1 4))
+           (symbol (gensym prefix))
+           (name (symbol->string symbol)))
+      (string= "oob" (substring name 0 3))))
+
+  (pass-if "accepts shared substring prefixes"
+    (let* ((prefix (substring/shared (string-copy "foobar")
+                                     1 4))
+           (symbol (gensym prefix))
+           (name (symbol->string symbol)))
+      (string= "oob" (substring name 0 3))))
+
+  (pass-if "counter incremented lazily"
+    (let* ((s1 (gensym ""))
+           (s2 (gensym ""))
+           (s3 (gensym ""))
+           (s4 (gensym ""))
+           (s4-counter (string->number (symbol->string s4)))
+           (s1-counter (string->number (symbol->string s1))))
+      (= s1-counter (1+ s4-counter))))
+
+  (pass-if "unaffected by mutation of prefix"
+    (let* ((prefix (string-copy "foo"))
+           (symbol (gensym prefix)))
+      (string-set! prefix 0 #\g)
+      (string= "foo" (substring (symbol->string symbol) 0 3))))
+
+  (pass-if "avoids existing interned symbols"
+    (let* ((n (1+ (string->number (symbol->string (gensym "")))))
+           (colliding-symbol (string->symbol (number->string n)))
+           (symbol (gensym "")))
+      (< n (string->number (symbol->string symbol))))))
 
 (with-test-prefix "extended read syntax"
   (pass-if (equal? "#{}#" (object->string (string->symbol ""))))
-- 
1.7.5.4


  parent reply	other threads:[~2012-03-06  9:55 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-03-05 17:17 [PATCH] Efficient Gensym Hack Mark H Weaver
2012-03-05 21:52 ` Andy Wingo
2012-03-06  3:16   ` Mark H Weaver
2012-03-06  8:56     ` Andy Wingo
2012-03-06  9:55 ` Mark H Weaver [this message]
2012-03-07 10:40   ` [PATCH] Efficient Gensym Hack (v2) Andy Wingo
2012-03-07 16:43     ` Mark H Weaver
2012-03-07 17:25       ` Andy Wingo
2012-03-07 19:28         ` Mark H Weaver
2012-03-07 20:04           ` Andy Wingo
2013-01-16 17:25   ` Andy Wingo
2012-03-10 22:55 ` [PATCH] Efficient Gensym Hack Ludovic Courtès

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://www.gnu.org/software/guile/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87pqcqvysj.fsf@netris.org \
    --to=mhw@netris.org \
    --cc=guile-devel@gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).