unofficial mirror of guile-devel@gnu.org 
 help / color / mirror / Atom feed
* [PATCH] Avoid malloc/free/dynwind for POSIX subrs that take strings
@ 2020-04-29 20:22 Ludovic Courtès
  2020-05-05  7:23 ` Andy Wingo
  0 siblings, 1 reply; 5+ messages in thread
From: Ludovic Courtès @ 2020-04-29 20:22 UTC (permalink / raw)
  To: guile-devel; +Cc: wingo

[-- Attachment #1: Type: text/plain, Size: 1619 bytes --]

Hello Guilers!

As discussed on IRC, the patch below arranges so that subrs that take
strings and pass them to syscall wrappers can avoid the
malloc/free/dynwind overhead.  This gives a 10% speedup on a tight loop
that calls these subrs:

--8<---------------cut here---------------start------------->8---
$ cat /tmp/tt.scm
(let loop ((i 2000000))
  (unless (zero? i)
    (lstat "/var/run/")
    (readlink "/bin/sh")
    (loop (1- i))))
$ time ./meta/guile /tmp/tt.scm

real	0m7.186s
user	0m5.446s
sys	0m2.499s
$ time guile /tmp/tt.scm

real	0m7.857s
user	0m6.123s
sys	0m2.460s
--8<---------------cut here---------------end--------------->8---

(It’s crazy that two thirds of the wall-clock time is user!)

On IRC, Andy mentioned concerns that the SCM could disappear and thus,
our the internal pointer returned by ‘scm_locale_string_data’ wouldn’t
be enough to prevent the stringbuf from being GC’d.

If we comment out the pure-ASCII optimization in
‘scm_locale_string_data’, we still get a 5% speedup or so.

Anyhow, we get the other benefit, which is that it simplifies code:

 dynl.c    |   14 +++-------
 filesys.c |   84 +++++++++++++++++++++++++-------------------------------------
 foreign.c |   10 ++-----
 fports.c  |   21 +++++----------
 gettext.c |   73 ++++++++++++++---------------------------------------
 load.c    |   24 +++++------------
 net_db.c  |   38 +++++++---------------------
 strings.c |   46 +++++++++++++++++++++++++++++++++
 strings.h |    3 +-
 9 files changed, 135 insertions(+), 178 deletions(-)

Thoughts?

Ludo’.


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: Type: text/x-patch, Size: 23899 bytes --]

diff --git a/libguile/dynl.c b/libguile/dynl.c
index e9c03e95b..6b7575b4a 100644
--- a/libguile/dynl.c
+++ b/libguile/dynl.c
@@ -1,6 +1,6 @@
 /* dynl.c - dynamic linking
 
-   Copyright 1990-2003,2008-2011,2017-2018
+   Copyright 1990-2003,2008-2011,2017-2018,2020
      Free Software Foundation, Inc.
 
    This file is part of Guile.
@@ -233,7 +233,7 @@ SCM_DEFINE (scm_dynamic_link, "dynamic-link", 0, 1, 0,
 #define FUNC_NAME s_scm_dynamic_link
 {
   void *handle;
-  char *file;
+  const char *file;
 
   scm_dynwind_begin (0);
   scm_i_dynwind_pthread_mutex_lock (&ltdl_lock);
@@ -241,10 +241,7 @@ SCM_DEFINE (scm_dynamic_link, "dynamic-link", 0, 1, 0,
   if (SCM_UNBNDP (filename))
     file = NULL;
   else
-    {
-      file = scm_to_locale_string (filename);
-      scm_dynwind_free (file);
-    }
+    file = scm_locale_string_data (filename);
 
   handle = sysdep_dynl_link (file, FUNC_NAME);
   scm_dynwind_end ();
@@ -315,12 +312,11 @@ SCM_DEFINE (scm_dynamic_pointer, "dynamic-pointer", 2, 0, 0,
     SCM_MISC_ERROR ("Already unlinked: ~S", dobj);
   else
     {
-      char *chars;
+      const char *chars;
 
       scm_dynwind_begin (0);
       scm_i_dynwind_pthread_mutex_lock (&ltdl_lock);
-      chars = scm_to_locale_string (name);
-      scm_dynwind_free (chars);
+      chars = scm_locale_string_data (name);
       val = sysdep_dynl_value (chars, DYNL_HANDLE (dobj), FUNC_NAME);
       scm_dynwind_end ();
 
diff --git a/libguile/filesys.c b/libguile/filesys.c
index 4f7115397..8406bf831 100644
--- a/libguile/filesys.c
+++ b/libguile/filesys.c
@@ -1,4 +1,4 @@
-/* Copyright 1996-2002,2004,2006,2009-2019
+/* Copyright 1996-2002,2004,2006,2009-2020
      Free Software Foundation, Inc.
 
    This file is part of Guile.
@@ -118,25 +118,22 @@
 
 /* Two helper macros for an often used pattern */
 
-#define STRING_SYSCALL(str,cstr,code)        \
-  do {                                       \
-    int eno;                                 \
-    char *cstr = scm_to_locale_string (str); \
-    SCM_SYSCALL (code);                      \
-    eno = errno; free (cstr); errno = eno;   \
+#define STRING_SYSCALL(str,cstr,code)                   \
+  do {                                                  \
+    int eno;                                            \
+    const char *cstr = scm_locale_string_data (str);    \
+    SCM_SYSCALL (code);                                 \
+    eno = errno; errno = eno;                           \
   } while (0)
 
-#define STRING2_SYSCALL(str1,cstr1,str2,cstr2,code)  \
-  do {                                               \
-    int eno;                                         \
-    char *cstr1, *cstr2;                             \
-    scm_dynwind_begin (0);                             \
-    cstr1 = scm_to_locale_string (str1);             \
-    scm_dynwind_free (cstr1);                          \
-    cstr2 = scm_to_locale_string (str2);             \
-    scm_dynwind_free (cstr2);                          \
-    SCM_SYSCALL (code);                              \
-    eno = errno; scm_dynwind_end (); errno = eno;      \
+#define STRING2_SYSCALL(str1,cstr1,str2,cstr2,code)     \
+  do {                                                  \
+    int eno;                                            \
+    const char *cstr1, *cstr2;                          \
+    cstr1 = scm_locale_string_data (str1);              \
+    cstr2 = scm_locale_string_data (str2);              \
+    SCM_SYSCALL (code);                                 \
+    eno = errno; errno = eno;                           \
   } while (0)
 
 
@@ -536,9 +533,8 @@ SCM_DEFINE (scm_stat, "stat", 1, 1, 0,
     }
   else if (scm_is_string (object))
     {
-      char *file = scm_to_locale_string (object);
+      const char *file = scm_locale_string_data (object);
       SCM_SYSCALL (rv = stat_or_stat64 (file, &stat_temp));
-      free (file);
     }
   else
     {
@@ -1038,12 +1034,9 @@ SCM_DEFINE (scm_readlink, "readlink", 1, 0, 0,
   int size = 100;
   char *buf;
   SCM result;
-  char *c_path;
-  
-  scm_dynwind_begin (0);
+  const char *c_path;
 
-  c_path = scm_to_locale_string (path);
-  scm_dynwind_free (c_path);
+  c_path = scm_locale_string_data (path);
 
   buf = scm_malloc (size);
 
@@ -1062,7 +1055,6 @@ SCM_DEFINE (scm_readlink, "readlink", 1, 0, 0,
     }
   result = scm_take_locale_stringn (buf, rv);
 
-  scm_dynwind_end ();
   return result;
 }
 #undef FUNC_NAME
@@ -1073,18 +1065,14 @@ SCM_DEFINE (scm_copy_file, "copy-file", 2, 0, 0,
 	    "The return value is unspecified.")
 #define FUNC_NAME s_scm_copy_file
 {
-  char *c_oldfile, *c_newfile;
+  const char *c_oldfile, *c_newfile;
   int oldfd, newfd;
   int n, rv;
   char buf[BUFSIZ];
   struct stat_or_stat64 oldstat;
 
-  scm_dynwind_begin (0);
-  
-  c_oldfile = scm_to_locale_string (oldfile);
-  scm_dynwind_free (c_oldfile);
-  c_newfile = scm_to_locale_string (newfile);
-  scm_dynwind_free (c_newfile);
+  c_oldfile = scm_locale_string_data (oldfile);
+  c_newfile = scm_locale_string_data (newfile);
 
   oldfd = open_or_open64 (c_oldfile, O_RDONLY | O_BINARY);
   if (oldfd == -1)
@@ -1115,7 +1103,6 @@ SCM_DEFINE (scm_copy_file, "copy-file", 2, 0, 0,
   if (close (newfd) == -1)
     SCM_SYSERROR;
 
-  scm_dynwind_end ();
   return SCM_UNSPECIFIED;
 }
 #undef FUNC_NAME
@@ -1384,11 +1371,10 @@ SCM_DEFINE (scm_access, "access?", 2, 0, 0,
 #define FUNC_NAME s_scm_access
 {
   int rv;
-  char *c_path;
+  const char *c_path;
 
-  c_path = scm_to_locale_string (path);
+  c_path = scm_locale_string_data (path);
   rv = access (c_path, scm_to_int (how));
-  free (c_path);
 
   return scm_from_bool (!rv);
 }
@@ -1643,14 +1629,14 @@ SCM_DEFINE (scm_canonicalize_path, "canonicalize-path", 1, 0, 0,
             "Raises an error if any component of @var{path} does not exist.")
 #define FUNC_NAME s_scm_canonicalize_path
 {
-  char *str, *canon;
-  
+  char *canon;
+  const char *str;
+
   SCM_VALIDATE_STRING (1, path);
 
-  str = scm_to_locale_string (path);
+  str = scm_locale_string_data (path);
   canon = canonicalize_file_name (str);
-  free (str);
-  
+
   if (canon)
     return scm_take_locale_string (canon);
   else
@@ -1664,11 +1650,11 @@ scm_i_relativize_path (SCM path, SCM in_path)
   SCM scanon;
   
   {
-    char *str, *canon;
+    char *canon;
+    const char *str;
 
-    str = scm_to_locale_string (path);
+    str = scm_locale_string_data (path);
     canon = canonicalize_file_name (str);
-    free (str);
 
     if (!canon)
       return SCM_BOOL_F;
@@ -1683,12 +1669,12 @@ scm_i_relativize_path (SCM path, SCM in_path)
 
       /* Try to canonicalize DIR, since we have canonicalized PATH.  */
       {
-        char *str, *canon;
+        char *canon;
+        const char *str;
 
-        str = scm_to_locale_string (dir);
+        str = scm_locale_string_data (dir);
         canon = canonicalize_file_name (str);
-        free (str);
-  
+
         if (canon)
           dir = scm_from_locale_string (canon);
         free (canon);
diff --git a/libguile/foreign.c b/libguile/foreign.c
index 1368cc9da..e5724e8bb 100644
--- a/libguile/foreign.c
+++ b/libguile/foreign.c
@@ -1,4 +1,4 @@
-/* Copyright 2010-2016,2018
+/* Copyright 2010-2016,2018,2020
      Free Software Foundation, Inc.
 
    This file is part of Guile.
@@ -386,22 +386,18 @@ SCM_DEFINE (scm_string_to_pointer, "string->pointer", 1, 1, 0,
     return scm_from_pointer (scm_to_locale_string (string), free);
   else
     {
-      char *enc;
+      const char *enc;
       SCM ret;
       
       SCM_VALIDATE_STRING (2, encoding);
 
-      enc = scm_to_locale_string (encoding);
-      scm_dynwind_begin (0);
-      scm_dynwind_free (enc);
+      enc = scm_locale_string_data (encoding);
 
       ret = scm_from_pointer
         (scm_to_stringn (string, NULL, enc,
                          scm_i_default_string_failed_conversion_handler ()),
          free);
 
-      scm_dynwind_end ();
-
       return ret;
     }
 }
diff --git a/libguile/fports.c b/libguile/fports.c
index 6019d9ec3..4aac06edf 100644
--- a/libguile/fports.c
+++ b/libguile/fports.c
@@ -1,4 +1,4 @@
-/* Copyright 1995-2004,2006-2015,2017-2019
+/* Copyright 1995-2004,2006-2015,2017-2020
      Free Software Foundation, Inc.
 
    This file is part of Guile.
@@ -151,12 +151,12 @@ fport_canonicalize_filename (SCM filename)
     }
   else if (scm_is_eq (mode, sym_absolute))
     {
-      char *str, *canon;
-  
-      str = scm_to_locale_string (filename);
+      char *canon;
+      const char *str;
+
+      str = scm_locale_string_data (filename);
       canon = canonicalize_file_name (str);
-      free (str);
-  
+
       return canon ? scm_take_locale_string (canon) : filename;
     }
   else
@@ -243,16 +243,13 @@ scm_open_file_with_encoding (SCM filename, SCM mode,
   SCM port;
   int fdes, flags, binary = 0;
   unsigned int retries;
-  char *file;
+  const char *file;
 
   if (SCM_UNLIKELY (!(scm_is_false (encoding) || scm_is_string (encoding))))
     scm_wrong_type_arg_msg (FUNC_NAME, 0, encoding,
                             "encoding to be string or false");
 
-  scm_dynwind_begin (0);
-
-  file = scm_to_locale_string (filename);
-  scm_dynwind_free (file);
+  file = scm_locale_string_data (filename);
 
   flags = scm_i_mode_to_open_flags (mode, &binary, FUNC_NAME);
 
@@ -321,8 +318,6 @@ scm_open_file_with_encoding (SCM filename, SCM mode,
         scm_i_set_port_encoding_x (port, enc);
     }
 
-  scm_dynwind_end ();
-
   return port;
 }
 #undef FUNC_NAME
diff --git a/libguile/gettext.c b/libguile/gettext.c
index b9af4d313..0a25a9cbf 100644
--- a/libguile/gettext.c
+++ b/libguile/gettext.c
@@ -1,4 +1,4 @@
-/* Copyright 2004,2006,2018
+/* Copyright 2004,2006,2018,2020
      Free Software Foundation, Inc.
 
    This file is part of Guile.
@@ -94,14 +94,11 @@ SCM_DEFINE (scm_gettext, "gettext", 1, 2, 0,
 	    "and defaults to LC_MESSAGES.")
 #define FUNC_NAME s_scm_gettext
 {
-  char *c_msgid;
+  const char *c_msgid;
   char const *c_result;
   SCM result;
 
-  scm_dynwind_begin (0);
-
-  c_msgid = scm_to_locale_string (msgid);
-  scm_dynwind_free (c_msgid);
+  c_msgid = scm_locale_string_data (msgid);
 
   if (SCM_UNBNDP (domain))
     {
@@ -110,10 +107,9 @@ SCM_DEFINE (scm_gettext, "gettext", 1, 2, 0,
     }
   else
     {
-      char *c_domain;
+      const char *c_domain;
 
-      c_domain = scm_to_locale_string (domain);
-      scm_dynwind_free (c_domain);
+      c_domain = scm_locale_string_data (domain);
 
       if (SCM_UNBNDP (category))
 	{
@@ -135,7 +131,6 @@ SCM_DEFINE (scm_gettext, "gettext", 1, 2, 0,
   else
     result = scm_from_locale_string (c_result);
 
-  scm_dynwind_end ();
   return result;
 }
 #undef FUNC_NAME
@@ -150,19 +145,13 @@ SCM_DEFINE (scm_ngettext, "ngettext", 3, 2, 0,
 	    "@var{category} is optional and defaults to LC_MESSAGES.")
 #define FUNC_NAME s_scm_ngettext
 {
-  char *c_msgid;
-  char *c_msgid_plural;
+  const char *c_msgid, *c_msgid_plural;
   unsigned long c_n;
   const char *c_result;
   SCM result;
 
-  scm_dynwind_begin (0);
-
-  c_msgid = scm_to_locale_string (msgid);
-  scm_dynwind_free (c_msgid);
-
-  c_msgid_plural = scm_to_locale_string (msgid_plural);
-  scm_dynwind_free (c_msgid_plural);
+  c_msgid = scm_locale_string_data (msgid);
+  c_msgid_plural = scm_locale_string_data (msgid_plural);
 
   c_n = scm_to_ulong (n);
 
@@ -173,10 +162,9 @@ SCM_DEFINE (scm_ngettext, "ngettext", 3, 2, 0,
     }
   else
     {
-      char *c_domain;
+      const char *c_domain;
 
-      c_domain = scm_to_locale_string (domain);
-      scm_dynwind_free (c_domain);
+      c_domain = scm_locale_string_data (domain);
 
       if (SCM_UNBNDP (category))
 	{
@@ -200,8 +188,7 @@ SCM_DEFINE (scm_ngettext, "ngettext", 3, 2, 0,
     result = msgid_plural;
   else
     result = scm_from_locale_string (c_result);
-  
-  scm_dynwind_end ();
+
   return result;
 }
 #undef FUNC_NAME
@@ -214,18 +201,13 @@ SCM_DEFINE (scm_textdomain, "textdomain", 0, 1, 0,
 #define FUNC_NAME s_scm_textdomain
 {
   char const *c_result;
-  char *c_domain;
+  const char *c_domain;
   SCM result = SCM_BOOL_F;
 
-  scm_dynwind_begin (0);
-
   if (SCM_UNBNDP (domainname))
     c_domain = NULL;
   else
-    {
-      c_domain = scm_to_locale_string (domainname);
-      scm_dynwind_free (c_domain);
-    }
+    c_domain = scm_locale_string_data (domainname);
 
   c_result = textdomain (c_domain);
   if (c_result != NULL)
@@ -233,7 +215,6 @@ SCM_DEFINE (scm_textdomain, "textdomain", 0, 1, 0,
   else if (!SCM_UNBNDP (domainname))
     SCM_SYSERROR;
 
-  scm_dynwind_end ();
   return result;
 }
 #undef FUNC_NAME
@@ -245,23 +226,16 @@ SCM_DEFINE (scm_bindtextdomain, "bindtextdomain", 1, 1, 0,
 	    "Return the directory bound to @var{domainname}.")
 #define FUNC_NAME s_scm_bindtextdomain
 {
-  char *c_domain;
-  char *c_directory;
+  const char *c_domain, *c_directory;
   char const *c_result;
   SCM result;
 
-  scm_dynwind_begin (0);
-
   if (SCM_UNBNDP (directory))
     c_directory = NULL;
   else
-    {
-      c_directory = scm_to_locale_string (directory);
-      scm_dynwind_free (c_directory);
-    }
+    c_directory = scm_locale_string_data (directory);
 
-  c_domain = scm_to_locale_string (domainname);
-  scm_dynwind_free (c_domain);
+  c_domain = scm_locale_string_data (domainname);
 
   c_result = bindtextdomain (c_domain, c_directory);
 
@@ -272,7 +246,6 @@ SCM_DEFINE (scm_bindtextdomain, "bindtextdomain", 1, 1, 0,
   else
     result = SCM_BOOL_F;
 
-  scm_dynwind_end ();
   return result;
 }
 #undef FUNC_NAME
@@ -284,23 +257,16 @@ SCM_DEFINE (scm_bind_textdomain_codeset, "bind-textdomain-codeset", 1, 1, 0,
 	    "Return the encoding of @var{domainname}.")
 #define FUNC_NAME s_scm_bind_textdomain_codeset
 {
-  char *c_domain;
-  char *c_encoding;
+  const char *c_domain, *c_encoding;
   char const *c_result;
   SCM result;
 
-  scm_dynwind_begin (0);
-
   if (SCM_UNBNDP (encoding))
     c_encoding = NULL;
   else
-    {
-      c_encoding = scm_to_locale_string (encoding);
-      scm_dynwind_free (c_encoding);
-    }
+    c_encoding = scm_locale_string_data (encoding);
 
-  c_domain = scm_to_locale_string (domainname);
-  scm_dynwind_free (c_domain);
+  c_domain = scm_locale_string_data (domainname);
 
   c_result = bind_textdomain_codeset (c_domain, c_encoding);
 
@@ -311,7 +277,6 @@ SCM_DEFINE (scm_bind_textdomain_codeset, "bind-textdomain-codeset", 1, 1, 0,
   else
     result = SCM_BOOL_F;
 
-  scm_dynwind_end ();
   return result;
 }
 #undef FUNC_NAME
diff --git a/libguile/load.c b/libguile/load.c
index e95c36db1..04fedc6e2 100644
--- a/libguile/load.c
+++ b/libguile/load.c
@@ -1,4 +1,4 @@
-/* Copyright 1995-1996,1998-2001,2004,2006,2008-2019
+/* Copyright 1995-1996,1998-2001,2004,2006,2008-2020
      Free Software Foundation, Inc.
 
    This file is part of Guile.
@@ -460,7 +460,7 @@ stringbuf_cat_locale_string (struct stringbuf *buf, SCM str)
 }
 
 static void
-stringbuf_cat (struct stringbuf *buf, char *str)
+stringbuf_cat (struct stringbuf *buf, const char *str)
 {
   size_t max_len = buf->buf_len - (buf->ptr - buf->buf) - 1;
   size_t len = strlen (str);
@@ -648,7 +648,7 @@ load_thunk_from_path (SCM filename, SCM source_file_name,
 {
   struct stringbuf buf;
   struct stat stat_buf;
-  char *filename_chars;
+  const char *filename_chars;
   size_t filename_len;
   SCM path, extensions;
   SCM result = SCM_BOOL_F;
@@ -664,11 +664,8 @@ load_thunk_from_path (SCM filename, SCM source_file_name,
     scm_misc_error ("%search-path", "bad extensions list: ~a",
                     scm_list_1 (extensions));
 
-  scm_dynwind_begin (0);
-
-  filename_chars = scm_to_locale_string (filename);
+  filename_chars = scm_locale_string_data (filename);
   filename_len = strlen (filename_chars);
-  scm_dynwind_free (filename_chars);
 
   /* If FILENAME is absolute and is still valid, return it unchanged.  */
   if (is_absolute_file_name (filename))
@@ -682,7 +679,7 @@ load_thunk_from_path (SCM filename, SCM source_file_name,
 
   /* If FILENAME has an extension, don't try to add EXTENSIONS to it.  */
   {
-    char *endp;
+    const char *endp;
 
     for (endp = filename_chars + filename_len - 1;
 	 endp >= filename_chars;
@@ -787,7 +784,6 @@ load_thunk_from_path (SCM filename, SCM source_file_name,
     scm_wrong_type_arg_msg (NULL, 0, path, "proper list");
 
  end:
-  scm_dynwind_end ();
   return result;
 }
 
@@ -804,7 +800,7 @@ search_path (SCM path, SCM filename, SCM extensions, SCM require_exts,
              struct stat *stat_buf)
 {
   struct stringbuf buf;
-  char *filename_chars;
+  const char *filename_chars;
   size_t filename_len;
   SCM result = SCM_BOOL_F;
   char initial_buffer[256];
@@ -816,11 +812,8 @@ search_path (SCM path, SCM filename, SCM extensions, SCM require_exts,
     scm_misc_error ("%search-path", "bad extensions list: ~a",
                     scm_list_1 (extensions));
 
-  scm_dynwind_begin (0);
-
-  filename_chars = scm_to_locale_string (filename);
+  filename_chars = scm_locale_string_data (filename);
   filename_len = strlen (filename_chars);
-  scm_dynwind_free (filename_chars);
 
   /* If FILENAME is absolute and is still valid, return it unchanged.  */
   if (is_absolute_file_name (filename))
@@ -835,7 +828,7 @@ search_path (SCM path, SCM filename, SCM extensions, SCM require_exts,
 
   /* If FILENAME has an extension, don't try to add EXTENSIONS to it.  */
   {
-    char *endp;
+    const char *endp;
 
     for (endp = filename_chars + filename_len - 1;
 	 endp >= filename_chars;
@@ -918,7 +911,6 @@ search_path (SCM path, SCM filename, SCM extensions, SCM require_exts,
     scm_wrong_type_arg_msg (NULL, 0, path, "proper list");
 
  end:
-  scm_dynwind_end ();
   return result;
 }
 
diff --git a/libguile/net_db.c b/libguile/net_db.c
index dfb61e8d0..f4748a55b 100644
--- a/libguile/net_db.c
+++ b/libguile/net_db.c
@@ -1,5 +1,5 @@
 /* "net_db.c" network database support
-   Copyright 1995-2001,2006,2009-2013,2018
+   Copyright 1995-2001,2006,2009-2013,2018,2020
      Free Software Foundation, Inc.
 
    This file is part of Guile.
@@ -165,9 +165,8 @@ SCM_DEFINE (scm_gethost, "gethost", 0, 1, 0,
     }
   else if (scm_is_string (host))
     {
-      char *str = scm_to_locale_string (host);
+      const char *str = scm_locale_string_data (host);
       entry = gethostbyname (str);
-      free (str);
     }
   else
     {
@@ -239,10 +238,9 @@ SCM_DEFINE (scm_getnet, "getnet", 0, 1, 0,
     }
   else if (scm_is_string (net))
     {
-      char *str = scm_to_locale_string (net);
+      const char *str = scm_locale_string_data (net);
       entry = getnetbyname (str);
       eno = errno;
-      free (str);
     }
   else
     {
@@ -291,10 +289,9 @@ SCM_DEFINE (scm_getproto, "getproto", 0, 1, 0,
     }
   else if (scm_is_string (protocol))
     {
-      char *str = scm_to_locale_string (protocol);
+      const char *str = scm_locale_string_data (protocol);
       entry = getprotobyname (str);
       eno = errno;
-      free (str);
     }
   else
     {
@@ -341,7 +338,7 @@ SCM_DEFINE (scm_getserv, "getserv", 0, 2, 0,
 #define FUNC_NAME s_scm_getserv
 {
   struct servent *entry;
-  char *protoname;
+  const char *protoname;
   int eno;
 
   if (SCM_UNBNDP (name))
@@ -357,17 +354,13 @@ SCM_DEFINE (scm_getserv, "getserv", 0, 2, 0,
       return scm_return_entry (entry);
     }
 
-  scm_dynwind_begin (0);
-
-  protoname = scm_to_locale_string (protocol);
-  scm_dynwind_free (protoname);
+  protoname = scm_locale_string_data (protocol);
 
   if (scm_is_string (name))
     {
-      char *str = scm_to_locale_string (name);
+      const char *str = scm_locale_string_data (name);
       entry = getservbyname (str, protoname);
       eno = errno;
-      free (str);
     }
   else
     {
@@ -378,7 +371,6 @@ SCM_DEFINE (scm_getserv, "getserv", 0, 2, 0,
   if (!entry)
     SCM_SYSERROR_MSG("no such service ~A", scm_list_1 (name), eno);
 
-  scm_dynwind_end ();
   return scm_return_entry (entry);
 }
 #undef FUNC_NAME
@@ -607,7 +599,7 @@ SCM_DEFINE (scm_getaddrinfo, "getaddrinfo", 1, 5, 0,
 #define FUNC_NAME s_scm_getaddrinfo
 {
   int err;
-  char *c_name, *c_service;
+  const char *c_name, *c_service;
   struct addrinfo c_hints, *c_result;
   SCM result = SCM_EOL;
 
@@ -617,21 +609,13 @@ SCM_DEFINE (scm_getaddrinfo, "getaddrinfo", 1, 5, 0,
   if (!SCM_UNBNDP (service) && scm_is_true (service))
     SCM_VALIDATE_STRING (SCM_ARG2, service);
 
-  scm_dynwind_begin (0);
-
   if (scm_is_string (name))
-    {
-      c_name = scm_to_locale_string (name);
-      scm_dynwind_free (c_name);
-    }
+    c_name = scm_locale_string_data (name);
   else
     c_name = NULL;
 
   if (scm_is_string (service))
-    {
-      c_service = scm_to_locale_string (service);
-      scm_dynwind_free (c_service);
-    }
+    c_service = scm_locale_string_data (service);
   else
     c_service = NULL;
 
@@ -667,8 +651,6 @@ SCM_DEFINE (scm_getaddrinfo, "getaddrinfo", 1, 5, 0,
   else
     scm_throw (sym_getaddrinfo_error, scm_list_1 (scm_from_int (err)));
 
-  scm_dynwind_end ();
-
   return result;
 }
 #undef FUNC_NAME
diff --git a/libguile/strings.c b/libguile/strings.c
index aab104498..eca410518 100644
--- a/libguile/strings.c
+++ b/libguile/strings.c
@@ -1,4 +1,4 @@
-/* Copyright 1995-1996,1998,2000-2001,2004,2006,2008-2016,2018-2019
+/* Copyright 1995-1996,1998,2000-2001,2004,2006,2008-2016,2018-2020
      Free Software Foundation, Inc.
 
    This file is part of Guile.
@@ -580,6 +580,50 @@ scm_i_string_data (SCM str)
   return data;
 }
 
+/* Return true if the LEN bytes at STR consist of pure ASCII.  */
+static int
+is_ascii (const char *str, size_t len)
+{
+  for (size_t i = 0; i < len; i++)
+    {
+      if (str[i] > 127)
+        return 0;
+    }
+
+  return 1;
+}
+
+/* Return a C null-terminated string in locale encoding corresponding to
+   STR.  When possible, avoid heap allocation; in other cases, return a
+   pointer to GC-managed memory.
+
+   Note: The returned pointer can be an interior pointer in the
+   stringbuf of STR.  Thus, the caller must ensure that STR is live
+   while it accesses the returned pointer.  */
+const char *
+scm_locale_string_data (SCM str)
+{
+  const void *data = scm_i_string_data (str);
+  size_t len = scm_i_string_length (str);
+
+  /* Per <https://pubs.opengroup.org/onlinepubs/007908799/xbd/charset.html>,
+     locale encodings must be a superset of the "portable character
+     set", essentially ASCII.  When STR is ASCII and nul-terminated,
+     return its pointer right away.  */
+  if (SCM_LIKELY (scm_i_is_narrow_string (str)
+                  && ((char *) data)[len] == '\0'
+                  && len < 1024       /* because 'is_ascii' is linear */
+                  && is_ascii (data, len)))
+    return data;
+
+  len *= scm_i_is_narrow_string (str) ? 1 : 4;
+  char *result = scm_gc_malloc_pointerless (len + 1, "string");
+  memcpy (result, data, len);
+  result[len] = '\0';
+
+  return result;
+}
+
 /* Returns a pointer to the 8-bit Latin-1 encoded character array of
    STR.  */
 const char *
diff --git a/libguile/strings.h b/libguile/strings.h
index 3f92d8c89..938b13c37 100644
--- a/libguile/strings.h
+++ b/libguile/strings.h
@@ -1,7 +1,7 @@
 #ifndef SCM_STRINGS_H
 #define SCM_STRINGS_H
 
-/* Copyright 1995-1998,2000-2001,2004-2006,2008-2011,2013,2015-2019
+/* Copyright 1995-1998,2000-2001,2004-2006,2008-2011,2013,2015-2020
      Free Software Foundation, Inc.
 
    This file is part of Guile.
@@ -239,6 +239,7 @@ SCM_API /* FIXME: not internal */ const char *scm_i_string_chars (SCM str);
 SCM_API /* FIXME: not internal */ char *scm_i_string_writable_chars (SCM str);
 SCM_INTERNAL const scm_t_wchar *scm_i_string_wide_chars (SCM str);
 SCM_INTERNAL const void *scm_i_string_data (SCM str);
+SCM_INTERNAL const char *scm_locale_string_data (SCM str);
 
 SCM_INTERNAL SCM scm_i_string_start_writing (SCM str);
 SCM_INTERNAL void scm_i_string_stop_writing (void);

^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2020-05-22 20:47 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-29 20:22 [PATCH] Avoid malloc/free/dynwind for POSIX subrs that take strings Ludovic Courtès
2020-05-05  7:23 ` Andy Wingo
2020-05-17 21:46   ` Ludovic Courtès
2020-05-18  6:50     ` Andy Wingo
2020-05-22 20:47       ` Ludovic Courtès

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).