unofficial mirror of guile-devel@gnu.org 
 help / color / mirror / Atom feed
* [PATCH] Efficient Gensym Hack
@ 2012-03-05 17:17 Mark H Weaver
  2012-03-05 21:52 ` Andy Wingo
                   ` (2 more replies)
  0 siblings, 3 replies; 12+ messages in thread
From: Mark H Weaver @ 2012-03-05 17:17 UTC (permalink / raw)
  To: guile-devel

[-- Attachment #1: Type: text/plain, Size: 2233 bytes --]

Hello all,

Here's an implementation of the efficient gensym hack for stable-2.0.
It makes 'gensym' about 4.7 times faster on my Yeeloong.  Gensyms are
not given names or even numbers until they are asked for their names or
hash values (for 'equal?' hash tables only).

The first patch adds an optimization for strings that is important for
gensyms.  It avoids locking a mutex when setting the shared flag on a
stringbuf if the shared flag is already set.  This is important for
gensyms because when 'gensym' is called, it must save the stringbuf of
the prefix and set its shared flag.  In the common case where 'gensym'
is called many times with the same prefix, this avoids locking any
mutexes within most calls to 'gensym'.

The second patch is trivial and unrelated to the efficient gensym hack,
but I include it here to save everyone an additional recompile of
libguile.

The third patch actually implements the efficient gensym hack.  It was
made a bit hairier by two unfortunate facts:

1. The implementation of symbols is split between symbols.c and
strings.c, and the gensym hack needs the internals of both.  I had to
add some new internal functions, including one to make a stringbuf from
a string and one to make a string from a stringbuf.

2. The symbol table uses the symbols themselves as the keys.  This was
already hairy and inefficient: take a look at symbol_lookup_assoc_fn,
which has to convert symbols to strings (which involves allocation) to
implement the hash lookup!  However, it makes things even worse when
forcing lazy gensyms, because we must intern the gensym before clearing
its "lazy gensym flag".  This is necessary because if the name we chose
already belongs to a pre-existing interned symbol, we _must_ choose
another name, and we must prevent any other thread from getting our
gensym's name until after we have interned it.  This involved adding a
new internal function to get the name of a symbol without checking its
lazy gensym flag, for use by symbol_lookup_assoc_fn.  IMHO, it would be
much better to use a weak-value hash table, with strings as the keys and
symbols as the values.  Maybe we can do that for 2.2.

Anyway, here are the patches.  Comments and suggestions welcome.

    Mark



[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: [PATCH 1/3] Don't lock mutex to set shared flag on stringbuf if it's already shared --]
[-- Type: text/x-patch, Size: 4335 bytes --]

From 5f558244261f3a22217d5136d0aebb7f644d7efb Mon Sep 17 00:00:00 2001
From: Mark H Weaver <mhw@netris.org>
Date: Mon, 5 Mar 2012 09:51:17 -0500
Subject: [PATCH 1/3] Don't lock mutex to set shared flag on stringbuf if it's
 already shared

* libguile/strings.c (set_stringbuf_shared): New internal static
  function to replace the macro SET_STRINGBUF_SHARED.  The macro assumed
  that the stringbuf_write_mutex was already locked, but this new
  function handles locking internally, and avoids locking if the
  stringbuf is already shared.

  (SET_STRINGBUF_SHARED): Removed.

  (scm_i_make_string, scm_i_substring, scm_i_substring_read_only,
  scm_i_make_symbol, scm_i_symbol_substring): Use set_stringbuf_shared
  instead of SET_STRINGBUF_SHARED.
---
 libguile/strings.c |   41 ++++++++++++++++++-----------------------
 1 files changed, 18 insertions(+), 23 deletions(-)

diff --git a/libguile/strings.c b/libguile/strings.c
index 494a658..35757f0 100644
--- a/libguile/strings.c
+++ b/libguile/strings.c
@@ -91,16 +91,6 @@
 
 #define STRINGBUF_LENGTH(buf)   (SCM_CELL_WORD_1 (buf))
 
-#define SET_STRINGBUF_SHARED(buf)					\
-  do									\
-    {									\
-      /* Don't modify BUF if it's already marked as shared since it might be \
-	 a read-only, statically allocated stringbuf.  */		\
-      if (SCM_LIKELY (!STRINGBUF_SHARED (buf)))				\
-	SCM_SET_CELL_WORD_0 ((buf), SCM_CELL_WORD_0 (buf) | STRINGBUF_F_SHARED); \
-    }									\
-  while (0)
-
 #ifdef SCM_STRING_LENGTH_HISTOGRAM
 static size_t lenhist[1001];
 #endif
@@ -227,6 +217,19 @@ narrow_stringbuf (SCM buf)
 
 scm_i_pthread_mutex_t stringbuf_write_mutex = SCM_I_PTHREAD_MUTEX_INITIALIZER;
 
+static void
+set_stringbuf_shared (SCM buf)
+{
+  /* Don't modify BUF if it's already marked as shared since it
+     might be a read-only, statically allocated stringbuf.  */
+  if (!STRINGBUF_SHARED (buf))
+    {
+      scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
+      SCM_SET_CELL_WORD_0 (buf, SCM_CELL_WORD_0 (buf) | STRINGBUF_F_SHARED);
+      scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
+    }
+}
+
 \f
 /* Copy-on-write strings.
  */
@@ -276,7 +279,7 @@ scm_i_make_string (size_t len, char **charsp, int read_only_p)
       if (SCM_UNLIKELY (scm_is_false (null_stringbuf)))
         {
           null_stringbuf = make_stringbuf (0);
-          SET_STRINGBUF_SHARED (null_stringbuf);
+          set_stringbuf_shared (null_stringbuf);
         }
       buf = null_stringbuf;
     }
@@ -341,9 +344,7 @@ scm_i_substring (SCM str, size_t start, size_t end)
       SCM buf;
       size_t str_start;
       get_str_buf_start (&str, &buf, &str_start);
-      scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
-      SET_STRINGBUF_SHARED (buf);
-      scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
+      set_stringbuf_shared (buf);
       return scm_double_cell (STRING_TAG, SCM_UNPACK(buf),
                               (scm_t_bits)str_start + start,
                               (scm_t_bits) end - start);
@@ -360,9 +361,7 @@ scm_i_substring_read_only (SCM str, size_t start, size_t end)
       SCM buf;
       size_t str_start;
       get_str_buf_start (&str, &buf, &str_start);
-      scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
-      SET_STRINGBUF_SHARED (buf);
-      scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
+      set_stringbuf_shared (buf);
       return scm_double_cell (RO_STRING_TAG, SCM_UNPACK(buf),
                               (scm_t_bits)str_start + start,
                               (scm_t_bits) end - start);
@@ -753,9 +752,7 @@ scm_i_make_symbol (SCM name, scm_t_bits flags,
   if (start == 0 && length == STRINGBUF_LENGTH (buf))
     {
       /* reuse buf. */
-      scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
-      SET_STRINGBUF_SHARED (buf);
-      scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
+      set_stringbuf_shared (buf);
     }
   else
     {
@@ -854,9 +851,7 @@ SCM
 scm_i_symbol_substring (SCM sym, size_t start, size_t end)
 {
   SCM buf = SYMBOL_STRINGBUF (sym);
-  scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
-  SET_STRINGBUF_SHARED (buf);
-  scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
+  set_stringbuf_shared (buf);
   return scm_double_cell (RO_STRING_TAG, SCM_UNPACK (buf),
 			  (scm_t_bits)start, (scm_t_bits) end - start);
 }
-- 
1.7.5.4


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #3: [PATCH 2/3] Move prototype for scm_i_try_narrow_string where it belongs --]
[-- Type: text/x-patch, Size: 1786 bytes --]

From 6c644645ecd2b1e84754b4759789edab2fdf9260 Mon Sep 17 00:00:00 2001
From: Mark H Weaver <mhw@netris.org>
Date: Mon, 5 Mar 2012 10:06:34 -0500
Subject: [PATCH 2/3] Move prototype for scm_i_try_narrow_string where it
 belongs

* libguile/strings.h (scm_i_try_narrow_string): Move prototype out of
  the "internal functions related to symbols" section.
---
 libguile/strings.h |    3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/libguile/strings.h b/libguile/strings.h
index 42e57ac..9735913 100644
--- a/libguile/strings.h
+++ b/libguile/strings.h
@@ -195,10 +195,12 @@ SCM_INTERNAL const void *scm_i_string_data (SCM str);
 SCM_INTERNAL SCM scm_i_string_start_writing (SCM str);
 SCM_INTERNAL void scm_i_string_stop_writing (void);
 SCM_INTERNAL int scm_i_is_narrow_string (SCM str);
+SCM_INTERNAL int scm_i_try_narrow_string (SCM str);
 SCM_INTERNAL scm_t_wchar scm_i_string_ref (SCM str, size_t x);
 SCM_INTERNAL int scm_i_string_contains_char (SCM str, char c);
 SCM_INTERNAL int scm_i_string_strcmp (SCM sstr, size_t start_x, const char *cstr);
 SCM_INTERNAL void scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr);
+
 /* internal functions related to symbols. */
 
 SCM_INTERNAL SCM scm_i_make_symbol (SCM name, scm_t_bits flags,
@@ -210,7 +212,6 @@ SCM_INTERNAL const char *scm_i_symbol_chars (SCM sym);
 SCM_INTERNAL const scm_t_wchar *scm_i_symbol_wide_chars (SCM sym);
 SCM_INTERNAL size_t scm_i_symbol_length (SCM sym);
 SCM_INTERNAL int scm_i_is_narrow_symbol (SCM str);
-SCM_INTERNAL int scm_i_try_narrow_string (SCM str);
 SCM_INTERNAL SCM scm_i_symbol_substring (SCM sym, size_t start, size_t end);
 SCM_INTERNAL scm_t_wchar scm_i_symbol_ref (SCM sym, size_t x);
 SCM_INTERNAL void scm_encoding_error (const char *subr, int err,
-- 
1.7.5.4


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #4: [PATCH 3/3] Efficient gensym hack: generate gensym names lazily --]
[-- Type: text/x-patch, Size: 16455 bytes --]

From 33cd595b883ab5e27ab410648bac89fab0459078 Mon Sep 17 00:00:00 2001
From: Mark H Weaver <mhw@netris.org>
Date: Mon, 5 Mar 2012 10:35:06 -0500
Subject: [PATCH 3/3] Efficient gensym hack: generate gensym names lazily

* libguile/strings.c (scm_i_symbol_to_string_no_lazy_gensym_check,
  scm_i_stringbuf_from_string, scm_i_string_from_stringbuf): New
  internal functions needed by symbols.c.

  (symbol_stringbuf): New internal static function to replace most uses
  of SYMBOL_STRINGBUF.  Handles forcing lazy gensyms.

  (scm_i_symbol_length, scm_c_symbol_length, scm_i_is_narrow_symbol,
  scm_i_symbol_chars, scm_i_symbol_wide_chars, scm_i_symbol_substring,
  scm_sys_symbol_dump): Use symbol_stringbuf instead of
  SYMBOL_STRINGBUF.

* libguile/strings.h (scm_i_symbol_to_string_no_lazy_gensym_check,
  scm_i_stringbuf_from_string, scm_i_string_from_stringbuf): Add
  prototypes.

* libguile/symbols.c (scm_i_symbol_hash): New internal function to
  replace macro of the same name.  Handles forcing lazy gensyms.

  (scm_gensym): Don't construct the name or even increment the
  gensym_counter here.  Just return a new symbol with the
  SCM_I_F_SYMBOL_LAZY_GENSYM flag set, with hash value 0, and with a
  stringbuf containing only the prefix.

  (scm_i_force_lazy_gensym): New internal procedure used when a lazy
  gensym is queried for its name or hash value.

  (symbol_lookup_hash_fn, symbol_lookup_assoc_fn): Avoid lazy gensym
  checks.

* libguile/symbols.h (scm_i_symbol_hash): Remove macro, and replace it
  with a prototype for the new internal function of the same name.
  (scm_i_force_lazy_gensym): Add prototype.
  (scm_i_symbol_is_lazy_gensym): New macro.
  (SCM_I_F_SYMBOL_LAZY_GENSYM): New flag.

* doc/ref/api-data.texi (Symbol Primitives): Update documentation.

* test-suite/tests/symbols.test (gensym): Add tests.
---
 doc/ref/api-data.texi         |    4 +-
 libguile/strings.c            |   65 +++++++++++++++++++++++++---
 libguile/strings.h            |    3 +
 libguile/symbols.c            |   96 ++++++++++++++++++++++++++++++----------
 libguile/symbols.h            |    6 ++-
 test-suite/tests/symbols.test |   36 +++++++++++++++-
 6 files changed, 175 insertions(+), 35 deletions(-)

diff --git a/doc/ref/api-data.texi b/doc/ref/api-data.texi
index 39c9790..a1203f0 100644
--- a/doc/ref/api-data.texi
+++ b/doc/ref/api-data.texi
@@ -5293,8 +5293,8 @@ code.  The @code{gensym} primitive meets this need:
 @deffnx {C Function} scm_gensym (prefix)
 Create a new symbol with a name constructed from a prefix and a counter
 value.  The string @var{prefix} can be specified as an optional
-argument.  Default prefix is @samp{@w{ g}}.  The counter is increased by 1
-at each call.  There is no provision for resetting the counter.
+argument.  Default prefix is @samp{@w{ g}}.  The name is constructed
+lazily, when the name or hash of the symbol is first requested.
 @end deffn
 
 The symbols generated by @code{gensym} are @emph{likely} to be unique,
diff --git a/libguile/strings.c b/libguile/strings.c
index 35757f0..cc49c7f 100644
--- a/libguile/strings.c
+++ b/libguile/strings.c
@@ -334,6 +334,39 @@ get_str_buf_start (SCM *str, SCM *buf, size_t *start)
   *buf = STRING_STRINGBUF (*str);
 }
 
+/* This is needed by the lazy gensym code in symbols.c.
+   It produces a shared stringbuf (so it will not be mutated)
+   containing exactly the characters in 'str'.  If possible,
+   it uses 'str's stringbuf.  However, if 'str' refers to only
+   part of its stringbuf, the stringbuf must be copied. */
+SCM
+scm_i_stringbuf_from_string (SCM str)
+{
+  SCM inner_str, buf;
+  size_t len, start;
+
+  len = STRING_LENGTH (str);
+  inner_str = str;
+  get_str_buf_start (&inner_str, &buf, &start);
+  if (STRINGBUF_LENGTH (buf) == len)
+    set_stringbuf_shared (buf);
+  else
+    {
+      SCM new_str = scm_i_substring_copy (str, 0, len);
+      buf = STRING_STRINGBUF (new_str);
+    }
+  return buf;
+}
+
+/* This is needed by the lazy gensym code in symbols.c. */
+SCM
+scm_i_string_from_stringbuf (SCM buf)
+{
+  size_t len = STRINGBUF_LENGTH (buf);
+  return scm_double_cell (STRING_TAG, SCM_UNPACK (buf),
+                          (scm_t_bits) 0, (scm_t_bits) len);
+}
+
 SCM
 scm_i_substring (SCM str, size_t start, size_t end)
 {
@@ -734,6 +767,14 @@ scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr)
 
 #define SYMBOL_STRINGBUF SCM_CELL_OBJECT_1
 
+static SCM
+symbol_stringbuf (SCM symbol)
+{
+  if (SCM_UNLIKELY (scm_i_symbol_is_lazy_gensym (symbol)))
+    scm_i_force_lazy_gensym (symbol);
+  return SYMBOL_STRINGBUF (symbol);
+}
+
 SCM
 scm_i_make_symbol (SCM name, scm_t_bits flags,
 		   unsigned long hash, SCM props)
@@ -793,7 +834,7 @@ scm_i_c_make_symbol (const char *name, size_t len,
 size_t
 scm_i_symbol_length (SCM sym)
 {
-  return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym));
+  return STRINGBUF_LENGTH (symbol_stringbuf (sym));
 }
 
 size_t
@@ -802,7 +843,7 @@ scm_c_symbol_length (SCM sym)
 {
   SCM_VALIDATE_SYMBOL (1, sym);
 
-  return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym));
+  return STRINGBUF_LENGTH (symbol_stringbuf (sym));
 }
 #undef FUNC_NAME
 
@@ -813,7 +854,7 @@ scm_i_is_narrow_symbol (SCM sym)
 {
   SCM buf;
 
-  buf = SYMBOL_STRINGBUF (sym);
+  buf = symbol_stringbuf (sym);
   return !STRINGBUF_WIDE (buf);
 }
 
@@ -824,7 +865,7 @@ scm_i_symbol_chars (SCM sym)
 {
   SCM buf;
 
-  buf = SYMBOL_STRINGBUF (sym);
+  buf = symbol_stringbuf (sym);
   if (!STRINGBUF_WIDE (buf))
     return (const char *) STRINGBUF_CHARS (buf);
   else
@@ -839,7 +880,7 @@ scm_i_symbol_wide_chars (SCM sym)
 {
   SCM buf;
 
-  buf = SYMBOL_STRINGBUF (sym);
+  buf = symbol_stringbuf (sym);
   if (STRINGBUF_WIDE (buf))
     return (const scm_t_wchar *) STRINGBUF_WIDE_CHARS (buf);
   else
@@ -850,12 +891,22 @@ scm_i_symbol_wide_chars (SCM sym)
 SCM
 scm_i_symbol_substring (SCM sym, size_t start, size_t end)
 {
-  SCM buf = SYMBOL_STRINGBUF (sym);
+  SCM buf = symbol_stringbuf (sym);
   set_stringbuf_shared (buf);
   return scm_double_cell (RO_STRING_TAG, SCM_UNPACK (buf),
 			  (scm_t_bits)start, (scm_t_bits) end - start);
 }
 
+SCM
+scm_i_symbol_to_string_no_lazy_gensym_check (SCM sym)
+{
+  SCM buf = SYMBOL_STRINGBUF (sym);
+  size_t len = STRINGBUF_LENGTH (buf);
+  set_stringbuf_shared (buf);
+  return scm_double_cell (RO_STRING_TAG, SCM_UNPACK (buf),
+			  (scm_t_bits) 0, (scm_t_bits) len);
+}
+
 /* Returns the Xth character of symbol SYM as a UCS-4 codepoint.  */
 scm_t_wchar
 scm_i_symbol_ref (SCM sym, size_t x)
@@ -1000,7 +1051,7 @@ SCM_DEFINE (scm_sys_symbol_dump, "%symbol-dump", 1, 0, 0, (SCM sym),
                  scm_from_ulong (scm_i_symbol_hash (sym)));
   e3 = scm_cons (scm_from_latin1_symbol ("interned"),
                  scm_symbol_interned_p (sym));
-  buf = SYMBOL_STRINGBUF (sym);
+  buf = symbol_stringbuf (sym);
 
   /* Stringbuf info */
   if (!STRINGBUF_WIDE (buf))
diff --git a/libguile/strings.h b/libguile/strings.h
index 9735913..5c51980 100644
--- a/libguile/strings.h
+++ b/libguile/strings.h
@@ -200,6 +200,8 @@ SCM_INTERNAL scm_t_wchar scm_i_string_ref (SCM str, size_t x);
 SCM_INTERNAL int scm_i_string_contains_char (SCM str, char c);
 SCM_INTERNAL int scm_i_string_strcmp (SCM sstr, size_t start_x, const char *cstr);
 SCM_INTERNAL void scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr);
+SCM_INTERNAL SCM scm_i_stringbuf_from_string (SCM str);
+SCM_INTERNAL SCM scm_i_string_from_stringbuf (SCM buf);
 
 /* internal functions related to symbols. */
 
@@ -213,6 +215,7 @@ SCM_INTERNAL const scm_t_wchar *scm_i_symbol_wide_chars (SCM sym);
 SCM_INTERNAL size_t scm_i_symbol_length (SCM sym);
 SCM_INTERNAL int scm_i_is_narrow_symbol (SCM str);
 SCM_INTERNAL SCM scm_i_symbol_substring (SCM sym, size_t start, size_t end);
+SCM_INTERNAL SCM scm_i_symbol_to_string_no_lazy_gensym_check (SCM sym);
 SCM_INTERNAL scm_t_wchar scm_i_symbol_ref (SCM sym, size_t x);
 SCM_INTERNAL void scm_encoding_error (const char *subr, int err,
 				      const char *message, SCM port, SCM chr);
diff --git a/libguile/symbols.c b/libguile/symbols.c
index 08512a6..31aa3a2 100644
--- a/libguile/symbols.c
+++ b/libguile/symbols.c
@@ -70,6 +70,16 @@ SCM_DEFINE (scm_sys_symbols, "%symbols", 0, 0, 0,
 /* {Symbols}
  */
 
+#define SYMBOL_HASH(x) ((unsigned long) SCM_CELL_WORD_2 (x))
+
+unsigned long
+scm_i_symbol_hash (SCM symbol)
+{
+  if (SCM_UNLIKELY (scm_i_symbol_is_lazy_gensym (symbol)))
+    scm_i_force_lazy_gensym (symbol);
+  return SYMBOL_HASH (symbol);
+}
+
 unsigned long
 scm_i_hash_symbol (SCM obj, unsigned long n, void *closure)
 {
@@ -165,7 +175,10 @@ lookup_interned_latin1_symbol (const char *str, size_t len,
 static unsigned long
 symbol_lookup_hash_fn (SCM obj, unsigned long max, void *closure)
 {
-  return scm_i_symbol_hash (obj) % max;
+  /* We must avoid forcing lazy gensyms here, because
+     scm_i_force_lazy_gensym needs to intern its symbol before clearing
+     the lazy gensym flag. */
+  return SYMBOL_HASH (obj) % max;
 }
 
 static SCM
@@ -175,9 +188,13 @@ symbol_lookup_assoc_fn (SCM obj, SCM alist, void *closure)
     {
       SCM sym = SCM_CAAR (alist);
 
-      if (scm_i_symbol_hash (sym) == scm_i_symbol_hash (obj)
-          && scm_is_true (scm_string_equal_p (scm_symbol_to_string (sym),
-                                              scm_symbol_to_string (obj))))
+      /* We must avoid forcing lazy gensyms here, because
+         scm_i_force_lazy_gensym needs to intern its symbol before
+         clearing the lazy gensym flag. */
+      if (SYMBOL_HASH (sym) == SYMBOL_HASH (obj)
+          && scm_is_true (scm_string_equal_p
+                          (scm_i_symbol_to_string_no_lazy_gensym_check (sym),
+                           scm_i_symbol_to_string_no_lazy_gensym_check (obj))))
         return SCM_CAR (alist);
     }
 
@@ -340,38 +357,69 @@ SCM_DEFINE (scm_string_ci_to_symbol, "string-ci->symbol", 1, 0, 0,
 /* The default prefix for `gensym'd symbols.  */
 static SCM default_gensym_prefix;
 
-#define MAX_PREFIX_LENGTH 30
-
 SCM_DEFINE (scm_gensym, "gensym", 0, 1, 0,
             (SCM prefix),
 	    "Create a new symbol with a name constructed from a prefix and\n"
-	    "a counter value. The string @var{prefix} can be specified as\n"
-	    "an optional argument. Default prefix is @code{ g}.  The counter\n"
-	    "is increased by 1 at each call. There is no provision for\n"
-	    "resetting the counter.")
+	    "a counter value.  The string @var{prefix} can be specified as\n"
+	    "an optional argument.  Default prefix is @code{ g}.  The name\n"
+            "is constructed lazily, when the name or hash of the symbol is\n"
+            "first requested.")
 #define FUNC_NAME s_scm_gensym
 {
-  static int gensym_counter = 0;
-  
-  SCM suffix, name;
-  int n, n_digits;
-  char buf[SCM_INTBUFLEN];
+  SCM prefix_stringbuf;
 
   if (SCM_UNBNDP (prefix))
     prefix = default_gensym_prefix;
+  else
+    SCM_VALIDATE_STRING (1, prefix);
 
-  /* mutex in case another thread looks and incs at the exact same moment */
-  scm_i_scm_pthread_mutex_lock (&scm_i_misc_mutex);
-  n = gensym_counter++;
-  scm_i_pthread_mutex_unlock (&scm_i_misc_mutex);
-
-  n_digits = scm_iint2str (n, 10, buf);
-  suffix = scm_from_latin1_stringn (buf, n_digits);
-  name = scm_string_append (scm_list_2 (prefix, suffix));
-  return scm_string_to_symbol (name);
+  prefix_stringbuf = scm_i_stringbuf_from_string (prefix);
+  return scm_double_cell (scm_tc7_symbol | SCM_I_F_SYMBOL_LAZY_GENSYM,
+                          SCM_UNPACK (prefix_stringbuf), (scm_t_bits) 0,
+                          SCM_UNPACK (scm_cons (SCM_BOOL_F, SCM_EOL)));
 }
 #undef FUNC_NAME
 
+void
+scm_i_force_lazy_gensym (SCM sym)
+{
+  static int gensym_counter = 0;
+
+  SCM prefix, suffix, name, handle;
+  int n, n_digits;
+  char buf[SCM_INTBUFLEN];
+
+  /* mutex in case another thread forces a gensym (possibly this one) */
+  scm_i_pthread_mutex_lock (&symbols_lock);
+  if (SCM_LIKELY (scm_i_symbol_is_lazy_gensym (sym)))
+    {
+      prefix = scm_i_string_from_stringbuf (SCM_CELL_OBJECT_1 (sym));
+      do
+        {
+          n = gensym_counter++;
+
+          n_digits = scm_iint2str (n, 10, buf);
+          suffix = scm_from_latin1_stringn (buf, n_digits);
+          name = scm_string_append (scm_list_2 (prefix, suffix));
+      
+          SCM_SET_CELL_OBJECT_1 (sym, scm_i_stringbuf_from_string (name));
+          SCM_SET_CELL_WORD_2   (sym, scm_i_string_hash (name));
+          handle = scm_hash_fn_create_handle_x (symbols, sym, SCM_UNDEFINED,
+                                                symbol_lookup_hash_fn,
+                                                symbol_lookup_assoc_fn,
+                                                NULL);
+        } while (SCM_UNLIKELY (!scm_is_eq (sym, SCM_CAR (handle))));
+
+      /* We must not clear the lazy gensym flag until we've found a name
+         that has not been previously interned, and all other cell words
+         contain their final values.  The lock does not save us here,
+         because symbols can be accessed without locking. */
+      SCM_SET_CELL_WORD_0 (sym, (SCM_CELL_WORD_0 (sym)
+                                 & ~SCM_I_F_SYMBOL_LAZY_GENSYM));
+    }
+  scm_i_pthread_mutex_unlock (&symbols_lock);
+}
+
 SCM_DEFINE (scm_symbol_hash, "symbol-hash", 1, 0, 0, 
 	    (SCM symbol),
 	    "Return a hash value for @var{symbol}.")
diff --git a/libguile/symbols.h b/libguile/symbols.h
index 6106f9e..b8fe997 100644
--- a/libguile/symbols.h
+++ b/libguile/symbols.h
@@ -28,11 +28,13 @@
 
 #define scm_is_symbol(x)            (!SCM_IMP (x) \
                                      && (SCM_TYP7 (x) == scm_tc7_symbol))
-#define scm_i_symbol_hash(x)        ((unsigned long) SCM_CELL_WORD_2 (x))
 #define scm_i_symbol_is_interned(x) \
   (!(SCM_CELL_WORD_0 (x) & SCM_I_F_SYMBOL_UNINTERNED))
+#define scm_i_symbol_is_lazy_gensym(x) \
+  (SCM_CELL_WORD_0 (x) & SCM_I_F_SYMBOL_LAZY_GENSYM)
 
 #define SCM_I_F_SYMBOL_UNINTERNED   0x100
+#define SCM_I_F_SYMBOL_LAZY_GENSYM  0x200
 
 \f
 
@@ -90,8 +92,10 @@ SCM_API SCM scm_take_utf8_symboln (char *sym, size_t len);
 
 /* internal functions. */
 
+SCM_INTERNAL unsigned long scm_i_symbol_hash (SCM symbol);
 SCM_INTERNAL unsigned long scm_i_hash_symbol (SCM obj, unsigned long n,
 					 void *closure);
+SCM_INTERNAL void scm_i_force_lazy_gensym (SCM sym);
 
 SCM_INTERNAL void scm_symbols_prehistory (void);
 SCM_INTERNAL void scm_init_symbols (void);
diff --git a/test-suite/tests/symbols.test b/test-suite/tests/symbols.test
index 6fbc6be..0dbb121 100644
--- a/test-suite/tests/symbols.test
+++ b/test-suite/tests/symbols.test
@@ -149,7 +149,41 @@
     (symbol? (gensym (make-string 4000 #\!))))
 
   (pass-if "accepts embedded NULs"
-    (> (string-length (symbol->string (gensym "foo\0bar\0braz\0foo\0bar\0braz\0foo\0bar\0braz\0foo\0bar\0braz\0foo\0bar\0braz\0foo\0bar\0braz\0"))) 6)))
+    (> (string-length (symbol->string (gensym "foo\0bar\0braz\0foo\0bar\0braz\0foo\0bar\0braz\0foo\0bar\0braz\0foo\0bar\0braz\0foo\0bar\0braz\0"))) 6))
+
+  (pass-if "accepts substring prefixes"
+    (let* ((prefix (substring "foobar" 1 4))
+           (symbol (gensym prefix))
+           (name (symbol->string symbol)))
+      (string= "oob" (substring name 0 3))))
+
+  (pass-if "accepts shared substring prefixes"
+    (let* ((prefix (substring/shared (string-copy "foobar")
+                                     1 4))
+           (symbol (gensym prefix))
+           (name (symbol->string symbol)))
+      (string= "oob" (substring name 0 3))))
+
+  (pass-if "counter incremented lazily"
+    (let* ((s1 (gensym ""))
+           (s2 (gensym ""))
+           (s3 (gensym ""))
+           (s4 (gensym ""))
+           (s4-counter (string->number (symbol->string s4)))
+           (s1-counter (string->number (symbol->string s1))))
+      (= s1-counter (1+ s4-counter))))
+
+  (pass-if "unaffected by mutation of prefix"
+    (let* ((prefix (string-copy "foo"))
+           (symbol (gensym prefix)))
+      (string-set! prefix 0 #\g)
+      (string= "foo" (substring (symbol->string symbol) 0 3))))
+
+  (pass-if "avoids existing interned symbols"
+    (let* ((n (1+ (string->number (symbol->string (gensym "")))))
+           (colliding-symbol (string->symbol (number->string n)))
+           (symbol (gensym "")))
+      (< n (string->number (symbol->string symbol))))))
 
 (with-test-prefix "extended read syntax"
   (pass-if (equal? "#{}#" (object->string (string->symbol ""))))
-- 
1.7.5.4


^ permalink raw reply related	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2013-01-16 17:25 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-03-05 17:17 [PATCH] Efficient Gensym Hack Mark H Weaver
2012-03-05 21:52 ` Andy Wingo
2012-03-06  3:16   ` Mark H Weaver
2012-03-06  8:56     ` Andy Wingo
2012-03-06  9:55 ` [PATCH] Efficient Gensym Hack (v2) Mark H Weaver
2012-03-07 10:40   ` Andy Wingo
2012-03-07 16:43     ` Mark H Weaver
2012-03-07 17:25       ` Andy Wingo
2012-03-07 19:28         ` Mark H Weaver
2012-03-07 20:04           ` Andy Wingo
2013-01-16 17:25   ` Andy Wingo
2012-03-10 22:55 ` [PATCH] Efficient Gensym Hack Ludovic Courtès

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).