unofficial mirror of guile-devel@gnu.org 
 help / color / mirror / Atom feed
* [BDW-GC] Static cell/string/symbol allocation
@ 2009-01-06  0:02 Ludovic Courtès
  2009-01-06  2:11 ` Ken Raeburn
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Ludovic Courtès @ 2009-01-06  0:02 UTC (permalink / raw)
  To: guile-devel

[-- Attachment #1: Type: text/plain, Size: 860 bytes --]

Hello,

I modified <snarf.h> in the BDW-GC branch to transparently have all
`SCM_SYMBOL ()' invocations use a statically allocated stringbuf.  The
symbol itself still has to be interned then so for simplicity the
implementation statically allocates an immutable string and then uses
`string->symbol' at initialization time to create an interned symbol
(which reuses the string's stringbuf).

The idea could be applied to other types in <snarf.h>, but most of them
require an initialization phase (e.g., subrs are assigned a number at
run-time).

Alas, there's no portable way that I know of to ask the compiler to
align double cells on 8-byte boundaries so that Guile actually
recognizes them as cells.  GCC's `aligned' attribute does the job, but
is not portable.  So this can't be committed, unless someone comes up
with a bright idea.  :-)

Thanks,
Ludo'.


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: The patch --]
[-- Type: text/x-patch, Size: 4990 bytes --]

diff --git a/libguile/_scm.h b/libguile/_scm.h
index 6b728be..08c58e5 100644
--- a/libguile/_scm.h
+++ b/libguile/_scm.h
@@ -58,6 +58,7 @@
 #include "libguile/variable.h"
 #include "libguile/modules.h"
 #include "libguile/inline.h"
+#include "libguile/strings.h"
 
 /* SCM_SYSCALL retries system calls that have been interrupted (EINTR).
    However this can be avoided if the operating system can restart
diff --git a/libguile/snarf.h b/libguile/snarf.h
index 5c2f187..f1fdede 100644
--- a/libguile/snarf.h
+++ b/libguile/snarf.h
@@ -3,7 +3,7 @@
 #ifndef SCM_SNARF_H
 #define SCM_SNARF_H
 
-/* Copyright (C) 1995,1996,1997,1998,1999,2000,2001, 2002, 2003, 2004, 2006 Free Software Foundation, Inc.
+/* Copyright (C) 1995,1996,1997,1998,1999,2000,2001, 2002, 2003, 2004, 2006, 2009 Free Software Foundation, Inc.
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -173,13 +173,17 @@ scm_c_define_subr_with_generic (RANAME, TYPE, \
 SCM_SNARF_HERE(static const char RANAME[]=STR)\
 SCM_SNARF_INIT(scm_make_synt (RANAME, TYPE, CFN))
 
-#define SCM_SYMBOL(c_name, scheme_name) \
-SCM_SNARF_HERE(static SCM c_name) \
-SCM_SNARF_INIT(c_name = scm_permanent_object (scm_from_locale_symbol (scheme_name)))
+#define SCM_SYMBOL(c_name, scheme_name)					\
+SCM_SNARF_HERE(								\
+  SCM_IMMUTABLE_STRING (c_name ## _string, scheme_name);		\
+  static SCM c_name)							\
+SCM_SNARF_INIT(c_name = scm_string_to_symbol (c_name ## _string))
 
-#define SCM_GLOBAL_SYMBOL(c_name, scheme_name) \
-SCM_SNARF_HERE(SCM c_name) \
-SCM_SNARF_INIT(c_name = scm_permanent_object (scm_from_locale_symbol (scheme_name)))
+#define SCM_GLOBAL_SYMBOL(c_name, scheme_name)				\
+SCM_SNARF_HERE(								\
+  SCM_IMMUTABLE_STRING (c_name ## _string, scheme_name);		\
+  SCM c_name)								\
+SCM_SNARF_INIT(c_name = scm_string_to_symbol (c_name ## _string))
 
 #define SCM_KEYWORD(c_name, scheme_name) \
 SCM_SNARF_HERE(static SCM c_name) \
@@ -269,6 +273,35 @@ SCM_SNARF_INIT(scm_set_smob_apply((tag), (c_name), (req), (opt), (rest));)
 SCM_SNARF_HERE(SCM c_name arglist) \
 SCM_SNARF_INIT(scm_set_smob_apply((tag), (c_name), (req), (opt), (rest));)
 
+\f
+/* Low-level snarfing.  */
+
+#define SCM_IMMUTABLE_DOUBLE_CELL(c_name, car, cbr, ccr, cdr)		\
+  static SCM_UNUSED const scm_t_cell c_name ## _raw_cell [2]		\
+    __attribute__ ((__aligned__ ((8)))) =				\
+    {									\
+      { SCM_PACK (car), SCM_PACK (cbr) },				\
+      { SCM_PACK (ccr), SCM_PACK (cdr) }				\
+    };									\
+  static SCM_UNUSED SCM c_name = SCM_PACK (& c_name ## _raw_cell)
+
+#define SCM_IMMUTABLE_STRINGBUF(c_name, contents)			\
+  SCM_IMMUTABLE_DOUBLE_CELL (c_name,					\
+			     scm_tc7_stringbuf | SCM_I_STRINGBUF_F_SHARED, \
+			     (scm_t_bits) (contents),			\
+                             (scm_t_bits) sizeof (contents) - 1,	\
+			     (scm_t_bits) 0)
+
+#define SCM_IMMUTABLE_STRING(c_name, contents)				\
+  SCM_IMMUTABLE_STRINGBUF (c_name ## _stringbuf, contents);		\
+  SCM_IMMUTABLE_DOUBLE_CELL (c_name,					\
+			     scm_tc7_string + 0x200,			\
+			     (scm_t_bits) &c_name ## _stringbuf_raw_cell, \
+			     (scm_t_bits) 0,				\
+			     sizeof (contents) - 1)
+
+\f
+/* Documentation.  */
 
 #ifdef SCM_MAGIC_SNARF_DOCS
 #undef SCM_ASSERT
diff --git a/libguile/strings.c b/libguile/strings.c
index 9188a0d..f1167c6 100644
--- a/libguile/strings.c
+++ b/libguile/strings.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1995,1996,1998,2000,2001, 2004, 2006, 2008 Free Software Foundation, Inc.
+/* Copyright (C) 1995,1996,1998,2000,2001, 2004, 2006, 2008, 2009 Free Software Foundation, Inc.
  * 
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -66,7 +66,7 @@
  * stringbuf.  So we have fixstrings and bigstrings...
  */
 
-#define STRINGBUF_F_SHARED      0x100
+#define STRINGBUF_F_SHARED      SCM_I_STRINGBUF_F_SHARED
 #define STRINGBUF_F_INLINE      0x200
 
 #define STRINGBUF_TAG           scm_tc7_stringbuf
diff --git a/libguile/strings.h b/libguile/strings.h
index e81ee3d..6d8bf5f 100644
--- a/libguile/strings.h
+++ b/libguile/strings.h
@@ -3,7 +3,7 @@
 #ifndef SCM_STRINGS_H
 #define SCM_STRINGS_H
 
-/* Copyright (C) 1995,1996,1997,1998,2000,2001, 2004, 2005, 2006, 2008 Free Software Foundation, Inc.
+/* Copyright (C) 1995,1996,1997,1998,2000,2001, 2004, 2005, 2006, 2008, 2009 Free Software Foundation, Inc.
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -111,6 +111,8 @@ SCM_API SCM scm_makfromstrs (int argc, char **argv);
 
 /* internal accessor functions.  Arguments must be valid. */
 
+#define SCM_I_STRINGBUF_F_SHARED      0x100
+
 SCM_INTERNAL SCM scm_i_make_string (size_t len, char **datap);
 SCM_INTERNAL SCM scm_i_substring (SCM str, size_t start, size_t end);
 SCM_INTERNAL SCM scm_i_substring_read_only (SCM str, size_t start, size_t end);

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [BDW-GC] Static cell/string/symbol allocation
  2009-01-06  0:02 [BDW-GC] Static cell/string/symbol allocation Ludovic Courtès
@ 2009-01-06  2:11 ` Ken Raeburn
  2009-01-13  0:04   ` Ludovic Courtès
  2009-01-13  0:34 ` Ludovic Courtès
  2009-01-15 23:20 ` Ludovic Courtès
  2 siblings, 1 reply; 6+ messages in thread
From: Ken Raeburn @ 2009-01-06  2:11 UTC (permalink / raw)
  To: Ludovic Courtès; +Cc: guile-devel

On Jan 5, 2009, at 19:02, Ludovic Courtès wrote:
> Alas, there's no portable way that I know of to ask the compiler to
> align double cells on 8-byte boundaries so that Guile actually
> recognizes them as cells.  GCC's `aligned' attribute does the job, but
> is not portable.  So this can't be committed, unless someone comes up
> with a bright idea.  :-)

There's no portable way, but it might be a bit more likely to happen  
if you try something like:

union {
   scm_t_cell cell[2];
   double d_for_alignment;
   long long ll_for_alignment;
}

... and use &c_name##_raw_cell.cell.  Try it on a few compilers and  
see what happens to the alignment (or if scm_t_cell[2] is already  
sufficiently aligned).

It's still no guarantee, but I think most systems will align either  
double or long long strictly enough.  And you could use __aligned__  
conditionally, too, for more than just GCC.  There are alignment  
pragma directives available in a number of compilers, but using them  
with macros may not be practical.  (Unfortunately for installed  
headers you should probably use compiler predefined macro tests rather  
than autoconf tests, since two compilers could be used on one system.)

If that all fails and the compiler winds up giving you weak alignment,  
perhaps you could have something in the SCM_SNARF_INIT'ed code to  
check for that case, and make a properly-aligned copy if the original  
is not already sufficiently aligned, and update the static variable  
before using it.  Code it right and the compiler's optimizer may just  
throw away most of the code on most machines where the alignment is  
known to be correct.  Then the code works whether or not the compiler  
gives you the alignment you want, and when it does (which should be  
most of the time) you win on performance...

Ken



^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [BDW-GC] Static cell/string/symbol allocation
  2009-01-06  2:11 ` Ken Raeburn
@ 2009-01-13  0:04   ` Ludovic Courtès
  2009-01-13 20:57     ` Ken Raeburn
  0 siblings, 1 reply; 6+ messages in thread
From: Ludovic Courtès @ 2009-01-13  0:04 UTC (permalink / raw)
  To: guile-devel

Hi Ken,

Thanks for your reply.

Ken Raeburn <raeburn@raeburn.org> writes:

> There's no portable way, but it might be a bit more likely to happen
> if you try something like:
>
> union {
>   scm_t_cell cell[2];
>   double d_for_alignment;
>   long long ll_for_alignment;
> }

The issue with this is that there's nothing telling us how compilers
should behave when encountering this.  Even if the underlying hardware
has a preferred alignment for these types, the compiler doesn't have to
honor it (on some RISC architectures the alignment can be mandated, and
failing to honor them would lead to SIGBUS).  So that appears to be
quite unreliable.

> It's still no guarantee, but I think most systems will align either
> double or long long strictly enough.  And you could use __aligned__
> conditionally, too, for more than just GCC.  There are alignment
> pragma directives available in a number of compilers, but using them
> with macros may not be practical.  (Unfortunately for installed
> headers you should probably use compiler predefined macro tests rather
> than autoconf tests, since two compilers could be used on one system.)

Exactly.  I'm currently opting for something along the lines of:

  #if (defined __GNUC__)
  # define SCM_ALIGNED(x)  __attribute__ ((aligned (x)))
  #elif (defined __INTEL_COMPILER)
  # define SCM_ALIGNED(x)  __declspec (align (x))
  #else
  /* Don't know how to align things.  */
  # undef SCM_ALIGNED
  #endif

... with code that just keeps using dynamic allocation when
`SCM_ALIGNED' is undefined.

Thanks,
Ludo'.





^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [BDW-GC] Static cell/string/symbol allocation
  2009-01-06  0:02 [BDW-GC] Static cell/string/symbol allocation Ludovic Courtès
  2009-01-06  2:11 ` Ken Raeburn
@ 2009-01-13  0:34 ` Ludovic Courtès
  2009-01-15 23:20 ` Ludovic Courtès
  2 siblings, 0 replies; 6+ messages in thread
From: Ludovic Courtès @ 2009-01-13  0:34 UTC (permalink / raw)
  To: guile-devel

Hi!

ludo@gnu.org (Ludovic Courtès) writes:

> I modified <snarf.h> in the BDW-GC branch to transparently have all
> `SCM_SYMBOL ()' invocations use a statically allocated stringbuf.  The
> symbol itself still has to be interned then so for simplicity the
> implementation statically allocates an immutable string and then uses
> `string->symbol' at initialization time to create an interned symbol
> (which reuses the string's stringbuf).

Here's an estimate of how much memory ends up being statically allocated
for libguile itself:

  $ cat libguile/*.c | grep -e 'SCM_\(GLOBAL_\)\?SYMBOL' | \
    sed s'/SCM_\(GLOBAL_\)\?SYMBOL[[:blank:]]*([^,]\+, "\([^"]\+\)".*$/\2/g' \
    |wc -c
  2921

This is the number of bytes of all the raw ASCII strings that make up
the symbols.

  $ cat libguile/*.c | grep -e 'SCM_\(GLOBAL_\)\?SYMBOL' | \
    sed s'/SCM_\(GLOBAL_\)\?SYMBOL[[:blank:]]*([^,]\+, "\([^"]\+\)".*$/\2/g' \
    |wc -l
  181

This is the number of symbols.  Since we have two double-cells (on a
32-bit arch, each double-cell is 4 * 4 = 16 byte-long) per symbol (one
for the stringbuf and one for the string), we end up statically
allocating 2921 + (16 * 181) = 5817 B.  OTOH, since we use
`string->symbol', we still have to allocate another 16 * 181 = 2896 B
worth of double-cells (for the symbols) so the "savings" are just
2921 B, i.e., 2921 B that are statically allocated instead of
dynamically (and they are shared, since they are read-only).  Producing
directly statically-allocated writable uninterned symbols would help
here.

We could apply the technique to the 1001 subrs (~16 KiB of
double-cells), but these still have to be initialized at run-time.  So
it looks like there isn't so much to be gained here.  Comments?

Thanks,
Ludo'.





^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [BDW-GC] Static cell/string/symbol allocation
  2009-01-13  0:04   ` Ludovic Courtès
@ 2009-01-13 20:57     ` Ken Raeburn
  0 siblings, 0 replies; 6+ messages in thread
From: Ken Raeburn @ 2009-01-13 20:57 UTC (permalink / raw)
  To: Ludovic Courtès; +Cc: guile-devel

On Jan 12, 2009, at 19:04, Ludovic Courtès wrote:
>> union {
>>  scm_t_cell cell[2];
>>  double d_for_alignment;
>>  long long ll_for_alignment;
>> }
>
> The issue with this is that there's nothing telling us how compilers
> should behave when encountering this.  Even if the underlying hardware
> has a preferred alignment for these types, the compiler doesn't have  
> to
> honor it (on some RISC architectures the alignment can be mandated,  
> and
> failing to honor them would lead to SIGBUS).  So that appears to be
> quite unreliable.

That's true, there's no guarantee at all.  Though I do think Guile is  
probably making a bunch of other assumptions about what the compiler  
or OS will do, especially when it comes to garbage collection,  
equality checks, stuff like that.  There's a difference between what's  
guaranteed by the language specs and what's reliable in the types of  
platforms we'd care about.  For many of them, I think the above will  
cause the desired alignment; I don't know about all of them.

> ... with code that just keeps using dynamic allocation when
> `SCM_ALIGNED' is undefined.

That works too.  The compilers and platforms of greatest interest for  
the biggest part of the user community (GCC and Visual Studio, and  
then...what else?) are likely to provide some facility for this, so  
the performance gain probably only fails to be realized for a small  
part of the audience.

Ken



^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [BDW-GC] Static cell/string/symbol allocation
  2009-01-06  0:02 [BDW-GC] Static cell/string/symbol allocation Ludovic Courtès
  2009-01-06  2:11 ` Ken Raeburn
  2009-01-13  0:34 ` Ludovic Courtès
@ 2009-01-15 23:20 ` Ludovic Courtès
  2 siblings, 0 replies; 6+ messages in thread
From: Ludovic Courtès @ 2009-01-15 23:20 UTC (permalink / raw)
  To: guile-devel

Hello!

ludo@gnu.org (Ludovic Courtès) writes:

> I modified <snarf.h> in the BDW-GC branch to transparently have all
> `SCM_SYMBOL ()' invocations use a statically allocated stringbuf.  The
> symbol itself still has to be interned then so for simplicity the
> implementation statically allocates an immutable string and then uses
> `string->symbol' at initialization time to create an interned symbol
> (which reuses the string's stringbuf).

FYI, I committed the code in the new `bdw-gc-static-alloc' branch.

  http://git.savannah.gnu.org/gitweb/?p=guile.git;a=shortlog;h=refs/heads/bdw-gc-static-alloc

Thanks,
Ludo'.





^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2009-01-15 23:20 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-01-06  0:02 [BDW-GC] Static cell/string/symbol allocation Ludovic Courtès
2009-01-06  2:11 ` Ken Raeburn
2009-01-13  0:04   ` Ludovic Courtès
2009-01-13 20:57     ` Ken Raeburn
2009-01-13  0:34 ` Ludovic Courtès
2009-01-15 23:20 ` Ludovic Courtès

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).