unofficial mirror of bug-guile@gnu.org 
 help / color / mirror / Atom feed
From: Matt Wette <matt.wette@gmail.com>
To: 27782@debbugs.gnu.org
Cc: "Ludovic Courtès" <ludo@gnu.org>, matt.wette@gmail.com
Subject: bug#27782: new patch for mma
Date: Sat, 4 Jul 2020 12:40:19 -0700	[thread overview]
Message-ID: <dbe12707-2a97-db9a-08f6-677b843021c1@gmail.com> (raw)
In-Reply-To: <CD68D357-0F0F-471C-BEC1-EB601844A04E@gmail.com>

[-- Attachment #1: Type: text/plain, Size: 740 bytes --]

Attached is a patch against guile master (at 3.0.4),
commit 5e1748f75128107e3a0707b66df5adb95d98437e

It is a incomplete, but functional, implementation of a mmap-api, including
1) mmap : low-level mmap, returns a bytevector, not searched for roots
2) mmap/search : like mmap, but not marked w/ GC_exclude_static_roots
3) mmap-file: high-level, easy-to-use mmap (e.g., (mmap-file "foo.dat"))

The above are coded in libguile/filesys.[ch].

Also included is test-suite/tests/mmap-api.test.

Build:
$ ./configure --enable-mmap-api
$ make
$ make check
...
Running mmap-api.test
...

Since implementation of mmap may be not simple, I propose a
git branch (e.g., wip-mmap-api) be created to invite group review,
update, test the update.

Matt



[-- Attachment #2: mmap-api-branch.patch --]
[-- Type: text/x-patch, Size: 13760 bytes --]

diff --git a/configure.ac b/configure.ac
index 3e96094f6..382d7d528 100644
--- a/configure.ac
+++ b/configure.ac
@@ -170,6 +170,10 @@ AC_ARG_ENABLE(tmpnam,
   AS_HELP_STRING([--disable-tmpnam],[omit POSIX tmpnam]),,
   enable_tmpnam=yes)
 
+AC_ARG_ENABLE(mmap-api,
+  AS_HELP_STRING([--enable-mmap-api],[enable MMAP interface]),,
+  enable_mmap_api=no)
+
 AC_ARG_ENABLE([deprecated],
   AS_HELP_STRING([--disable-deprecated],[omit deprecated features]))
 
@@ -917,6 +921,10 @@ if test "$enable_tmpnam" = yes; then
    AC_DEFINE([ENABLE_TMPNAM], 1, [Define when tmpnam support is enabled.])
 fi
 
+if test "$enable_mmap_api" = yes; then
+   AC_DEFINE([ENABLE_MMAP_API], 1, [Define when MMAP API is enabled.])
+fi
+
 AC_REPLACE_FUNCS([strerror memmove])
 
 # Reasons for testing:
diff --git a/libguile/filesys.c b/libguile/filesys.c
index 39bfd38cc..04e5dfd4d 100644
--- a/libguile/filesys.c
+++ b/libguile/filesys.c
@@ -79,11 +79,22 @@
 # include <sys/sendfile.h>
 #endif
 
+#ifdef ENABLE_MMAP_API
+#if defined(HAVE_SYS_MMAN_H) && defined(HAVE_MAP_ANONYMOUS)
+#  include <sys/mman.h>
+#  include <sys/stat.h>
+#  include <errno.h>
+#endif
+#endif
+
 #include "async.h"
 #include "boolean.h"
+#include "bytevectors.h"                /* mmap */
 #include "dynwind.h"
 #include "fdes-finalizers.h"
 #include "feature.h"
+#include "finalizers.h"                 /* mmap */
+#include "foreign.h"                    /* mmap */
 #include "fports.h"
 #include "gsubr.h"
 #include "iselect.h"
@@ -1880,6 +1891,314 @@ scm_dir_free (SCM p)
 
 \f
 
+#ifdef ENABLE_MMAP_API
+#if defined(HAVE_SYS_MMAN_H) && defined(HAVE_MAP_ANONYMOUS)
+
+/* FiXME
+ * rlb says add msync()
+ * Windows : look for MapViewOfFile
+ */
+
+/* undefined, string or int acceptable */
+static int
+mm_flags (SCM prot, int def)
+{
+  if (SCM_UNBNDP (prot))
+    return def;
+  else
+    scm_misc_error("mmap", "bad prot option", SCM_EOL);
+  return -1;
+}
+
+static int
+mm_prot (SCM prot, int def)
+{
+  if (SCM_UNBNDP (prot))
+    return def;
+  else
+    scm_misc_error("mmap", "bad prot option", SCM_EOL);
+  return -1;
+}
+
+static void
+mmap_finalizer (void *ptr, void *data)
+{
+  SCM bvec;
+  void *c_addr;
+  size_t c_len;
+  int res;
+
+  bvec = SCM_PACK_POINTER (ptr);
+  if (!SCM_BYTEVECTOR_P (bvec))
+    abort();
+  
+  c_addr = SCM_BYTEVECTOR_CONTENTS (bvec);
+  c_len = (size_t) data;
+  res = munmap(c_addr, c_len);
+  if (res != 0)
+    scm_misc_error ("mmap", "failed to munmap memory", SCM_EOL);
+}
+
+SCM_DEFINE (scm_mmap_search, "mmap/search", 2, 4, 0, 
+            (SCM addr, SCM len, SCM prot, SCM flags, SCM fd, SCM offset),
+	    "mmap addr len [prot [flags [fd [offset]]]]"
+	    "See the unix man page for mmap.  Returns a bytevector."
+	    "Note that the region allocated will be searched by the garbage"
+	    "collector for pointers. \n"
+	    "Defaults:\n"
+	    "  prot   (logior PROT_READ PROT_WRITE)\n"
+	    "  flags  (logior MAP_ANON MAP_PRIVATE)\n"
+	    "  fd     -1\n"
+	    "  offset 0\n"
+	    "E.g., @code{(define reg (mmap/search %null-pointer #x1000)}\n")
+#define FUNC_NAME s_scm_mmap_search
+{
+  void *c_mem, *c_addr;
+  size_t c_len;
+  int c_prot, c_flags, c_fd;
+  scm_t_off c_offset;
+  SCM pointer, bvec;
+
+  if (SCM_POINTER_P (addr))
+    c_addr = SCM_POINTER_VALUE (addr);
+  else if (scm_is_integer (addr))
+    c_addr = (void*) scm_to_uintptr_t (addr);
+  else
+    SCM_MISC_ERROR("bad addr", addr);
+
+  c_len = scm_to_size_t (len);
+  
+  if (SCM_UNBNDP (prot))
+    c_prot = PROT_READ | PROT_WRITE;
+  else 
+    c_prot = scm_to_int (prot);
+
+  if (SCM_UNBNDP (flags))
+    c_flags = MAP_ANON | MAP_PRIVATE;
+  else
+    c_flags = scm_to_int (flags);
+
+  if (SCM_UNBNDP (fd))
+    c_fd = -1;
+  else
+    c_fd = scm_to_int (fd);
+
+  if (SCM_UNBNDP (fd))
+    c_offset = 0;
+  else
+    c_offset = scm_to_off_t (offset);
+
+  c_mem = mmap(c_addr, c_len, c_prot, c_flags, c_fd, c_offset);
+  if (c_mem == MAP_FAILED)
+    SCM_SYSERROR;			/* errno set */
+
+  pointer = scm_cell (scm_tc7_pointer, (scm_t_bits) c_mem);
+  bvec = scm_c_take_typed_bytevector((signed char *) c_mem + c_offset, c_len,
+				     SCM_ARRAY_ELEMENT_TYPE_VU8, pointer);
+  assert(sizeof(void*) <= sizeof(size_t));
+  scm_i_set_finalizer (SCM2PTR (bvec), mmap_finalizer, (void*) c_len);
+  return bvec;
+}
+#undef FUNC_NAME
+
+SCM_DEFINE (scm_mmap, "mmap", 2, 4, 0, 
+            (SCM addr, SCM len, SCM prot, SCM flags, SCM fd, SCM offset),
+	    "mmap addr len [prot [flags [fd [offset]]]]"
+	    "See the man page.  Returns a bytevector."
+	    "Note that the region returned by mmap will NOT be searched "
+	    "by the garbage collector for pointers.\n"
+	    "Defaults:\n"
+	    "  PROT   (logior PROT_READ PROT_WRITE)\n"
+	    "  FLAGS  (logior MAP_ANON MAP_PRIVATE)\n"
+	    "  FD     -1\n"
+	    "  OFFSET 0\n"
+	    "@example\n"
+	    "(define bvec-1MB (mmap 0 #x100000)\n"
+	    "@end example"
+	    )
+#define FUNC_NAME s_scm_mmap
+{
+  void *c_mem;
+  size_t c_len;
+  SCM bvec;
+
+  bvec = scm_mmap_search(addr, len, prot, flags, fd, offset);
+  c_mem = SCM_BYTEVECTOR_CONTENTS(bvec);
+  c_len = SCM_BYTEVECTOR_LENGTH(bvec);
+
+  /* Tell GC not to scan for pointers. */
+  GC_exclude_static_roots(c_mem, (char*) c_mem + c_len);
+
+  return bvec;
+}
+#undef FUNC_NAME
+
+
+// call fstat to get file size
+SCM_DEFINE (scm_mmap_file, "mmap-file", 1, 1, 0, 
+            (SCM file, SCM prot),
+	    "This procedure accepts a file in the form of filename,"
+            " file-port or fd.  It returns a bytevector.  It must not"
+            " contain scheme allocated objects as it will not be"
+            " searched for pointers.\n"
+	    "Defaults:\n"
+	    "  prot   \"r\"\n"
+	    "E.g., @code{(define bvec-1MB (mmap-file \"foo.dat\")}")
+#define FUNC_NAME s_scm_mmap_file
+{
+  int fd, flags, prot_;
+  int fd_is_local = 0;
+  struct stat sb;
+  off_t size;
+  void *ptr;
+  size_t len;
+  char *filename;
+  SCM pointer, bvec;
+
+  if (scm_is_string (file)) {
+    scm_dynwind_begin (0);
+    filename = scm_to_locale_string (file);
+    scm_dynwind_free (filename);
+    flags = mm_flags(prot, O_RDONLY);
+    prot_ = mm_prot(prot, PROT_READ);
+    fd = open(filename, flags);
+    if (fd == -1)
+      scm_misc_error ("mmap-file", "could not open file ~S", scm_list_1(file));
+    fd_is_local = 1;
+  } else if (SCM_PORTP (file)) {
+    if (! SCM_UNBNDP (prot))
+      scm_misc_error ("mmap-file", "file open, prot arg not allowed", SCM_EOL);
+    if (SCM_PORT_TYPE (file) != scm_file_port_type)
+      scm_misc_error ("mmap-file", "port is not file port", SCM_EOL);
+    fd = SCM_FPORT_FDES (file);
+    if (scm_input_port_p (file)) {
+      if (scm_output_port_p (file)) {
+        flags = O_RDWR;
+        prot_ = PROT_READ | PROT_WRITE;
+      } else {
+        flags = O_RDONLY;
+        prot_ = PROT_READ;
+      }
+    } else if (scm_output_port_p (file)) {
+      flags = O_WRONLY;
+      prot_ = PROT_WRITE;
+    } else {                            /* not read, not write */
+      abort();
+    }
+  } else if (scm_is_integer (file)) {
+    fd = scm_to_signed_integer (file, 0, 1024); /* FIXME: what for 1024? */
+    /* I think fstat() may tell us if the FD is RD,WR,RDWR. */
+    flags = O_RDONLY;
+    prot_ = PROT_READ;
+  } else {
+    scm_misc_error ("mmap-file", "bad arg for file", SCM_EOL);
+  }
+  fstat(fd, &sb);
+  size = sb.st_size;
+  ptr = mmap(0, size, prot_, MAP_PRIVATE, fd, 0);
+  len = (size_t) size;
+  if (fd_is_local) close(fd);
+  
+  if (ptr == MAP_FAILED)
+    SCM_SYSERROR;			/* errno set */
+
+  pointer = scm_cell (scm_tc7_pointer, (scm_t_bits) ptr);
+  bvec = scm_c_take_typed_bytevector((signed char *) ptr, len,
+				     SCM_ARRAY_ELEMENT_TYPE_VU8, pointer);
+
+  assert(sizeof(void*) <= sizeof(size_t));
+  scm_i_set_finalizer (SCM2PTR (bvec), mmap_finalizer, (void*) len);
+
+  /* Tell GC not to scan for pointers. */
+  GC_exclude_static_roots(ptr, (char*)ptr + len);
+
+  return bvec;
+}
+#undef FUNC_NAME
+
+/* The following copied from bytevectors.c. Kludge? */
+#define SCM_BYTEVECTOR_SET_LENGTH(_bv, _len)            \
+  SCM_SET_CELL_WORD_1 ((_bv), (scm_t_bits) (_len))
+#define SCM_BYTEVECTOR_SET_CONTENTS(_bv, _contents)	\
+  SCM_SET_CELL_WORD_2 ((_bv), (scm_t_bits) (_contents))
+
+SCM_DEFINE (scm_munmap, "munmap", 1, 0, 0, 
+            (SCM bvec),
+	    "See the man page. Given bytevector generated by a mmap"
+            " function, unmap the associated memory.  The argument"
+            " will be modified to reflect a zero length bv.")
+#define FUNC_NAME s_scm_munmap
+{
+  void *addr;
+  size_t len;
+  int res;
+
+  SCM_VALIDATE_BYTEVECTOR (1, bvec);
+  
+  addr = (void *) SCM_BYTEVECTOR_CONTENTS (bvec);
+  len = SCM_BYTEVECTOR_LENGTH (bvec);
+
+  /* Invalidate further work on this bytevector. */
+  SCM_BYTEVECTOR_SET_LENGTH (bvec, 0);
+  SCM_BYTEVECTOR_SET_CONTENTS (bvec, NULL);
+
+  res = munmap(addr, len);
+  if (res == -1)
+    SCM_SYSERROR;			/* errno set */
+
+  return SCM_UNSPECIFIED;
+}
+#undef FUNC_NAME
+
+static void init_mmap_api(void) {
+  scm_add_feature("mmap-api");
+  scm_add_feature("mmap-file");
+
+#ifdef PROT_NONE
+  scm_c_define ("PROT_NONE", scm_from_int (PROT_NONE));
+#endif
+#ifdef PROT_READ
+  scm_c_define ("PROT_READ", scm_from_int (PROT_READ));
+#endif
+#ifdef PROT_WRITE
+  scm_c_define ("PROT_WRITE", scm_from_int (PROT_WRITE));
+#endif
+#ifdef PROT_EXEC
+  scm_c_define ("PROT_EXEC", scm_from_int (PROT_EXEC));
+#endif
+
+#ifdef MAP_ANONYMOUS
+  scm_c_define ("MAP_ANONYMOUS", scm_from_int (MAP_ANONYMOUS));
+#endif
+#ifdef MAP_ANON
+  scm_c_define ("MAP_ANON", scm_from_int (MAP_ANON));
+#endif
+#ifdef MAP_FILE
+  scm_c_define ("MAP_FILE", scm_from_int (MAP_FILE));
+#endif
+#ifdef MAP_FIXED
+  scm_c_define ("MAP_FIXED", scm_from_int (MAP_FIXED));
+#endif
+#ifdef MAP_HASSEMAPHORE
+  scm_c_define ("MAP_HASSEMAPHORE", scm_from_int (MAP_HASSEMAPHORE));
+#endif
+#ifdef MAP_PRIVATE
+  scm_c_define ("MAP_PRIVATE", scm_from_int (MAP_PRIVATE));
+#endif
+#ifdef MAP_SHARED
+  scm_c_define ("MAP_SHARED", scm_from_int (MAP_SHARED));
+#endif
+#ifdef MAP_NOCACHE
+  scm_c_define ("MAP_NOCACHE", scm_from_int (MAP_NOCACHE));
+#endif
+  scm_c_define ("PAGE_SIZE", scm_from_int (getpagesize()));
+}
+
+#endif /* HAVE_SYS_MMAN_H && HAVE_MMAP_ANONYMOUS */
+#endif /* ENABLE_MMAP_API */
+
+\f
+
 void
 scm_init_filesys ()
 {
@@ -1954,6 +2273,10 @@ scm_init_filesys ()
 #endif
 #endif /* HAVE_POSIX */
 
+#ifdef ENABLE_MMAP_API
+  init_mmap_api();
+#endif /* ENABLE_MMAP_API */
+  
   /* `access' symbols.  */
   scm_c_define ("R_OK", scm_from_int (R_OK));
   scm_c_define ("W_OK", scm_from_int (W_OK));
diff --git a/libguile/filesys.h b/libguile/filesys.h
index f870ee434..ddf506ae6 100644
--- a/libguile/filesys.h
+++ b/libguile/filesys.h
@@ -69,6 +69,10 @@ SCM_API SCM scm_dirname (SCM filename);
 SCM_API SCM scm_basename (SCM filename, SCM suffix);
 SCM_API SCM scm_canonicalize_path (SCM path);
 SCM_API SCM scm_sendfile (SCM out, SCM in, SCM count, SCM offset);
+SCM_API SCM scm_mmap_search(SCM addr, SCM len, SCM prot, SCM flags, SCM fd, SCM offset);
+SCM_API SCM scm_mmap(SCM addr, SCM len, SCM prot, SCM flags, SCM fd, SCM offset);
+SCM_API SCM scm_mmap_file(SCM file, SCM prot);
+SCM_API SCM scm_munmap(SCM bvec);
 SCM_INTERNAL SCM scm_i_relativize_path (SCM path, SCM in_path);
 
 SCM_INTERNAL void scm_init_filesys (void);
diff --git a/test-suite/Makefile.am b/test-suite/Makefile.am
index 8158aaf44..cbd7c6568 100644
--- a/test-suite/Makefile.am
+++ b/test-suite/Makefile.am
@@ -76,6 +76,7 @@ SCM_TESTS = tests/00-initial-env.test		\
 	    tests/load.test			\
 	    tests/match.test			\
 	    tests/match.test.upstream		\
+	    tests/mmap-api.test			\
 	    tests/modules.test			\
 	    tests/multilingual.nottest		\
 	    tests/net-db.test			\
diff --git a/test-suite/tests/mmap-api.test b/test-suite/tests/mmap-api.test
new file mode 100644
index 000000000..557d4c8db
--- /dev/null
+++ b/test-suite/tests/mmap-api.test
@@ -0,0 +1,59 @@
+;;;; mmap-api.test --- Tests for Guile threading.    -*- scheme -*-
+;;;;
+;;;; Copyright 2020 Free Software Foundation, Inc.
+;;;;
+;;;; This library is free software; you can redistribute it and/or
+;;;; modify it under the terms of the GNU Lesser General Public
+;;;; License as published by the Free Software Foundation; either
+;;;; version 3 of the License, or (at your option) any later version.
+;;;; 
+;;;; This library is distributed in the hope that it will be useful,
+;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;;;; Lesser General Public License for more details.
+;;;; 
+;;;; You should have received a copy of the GNU Lesser General Public
+;;;; License along with this library; if not, write to the Free Software
+;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+(define-module (test-mmap-api)
+  #:use-module (test-suite lib)
+  #:use-module (test-suite guile-test)
+  #:use-module (rnrs bytevectors)
+  )
+
+(define (mmap-test-file)
+  (data-file-name "foo.txt"))
+
+(define mmap-test-string "hello, world")
+
+(define (gen-mmap-test-file)
+  (with-output-to-file (mmap-test-file)
+    (lambda () (display mmap-test-string))))
+
+(when (provided? 'mmap-file)
+
+  (gen-mmap-test-file)
+
+  (with-test-prefix "mmap-file"
+      
+    (pass-if "mmap-file 1"
+      (let ((bv (mmap-file (mmap-test-file))))
+        (string=? (utf8->string bv) mmap-test-string)))
+
+    ))
+
+(when (provided? 'mmap-api)
+
+  (gen-mmap-test-file)
+
+  (with-test-prefix "mmap-api"
+      
+    (pass-if "mmap-api 1"
+      (let ((bv (mmap 0 #x100)))
+        (bytevector-u8-set! bv 0 34)
+        (= (bytevector-u8-ref bv 0) 34)))
+
+    ))
+
+;; --- last line ---

  parent reply	other threads:[~2020-07-04 19:40 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-07-21 13:39 bug#27782: [wishlist] scheme level mmap Matt Wette
     [not found] ` <handler.27782.B.150064439025677.ack@debbugs.gnu.org>
2017-07-21 14:35   ` bug#27782: Acknowledgement ([wishlist] scheme level mmap) Matt Wette
2017-10-28 15:25 ` bug#27782: mmap for guile 2.2.2 Matt Wette
2017-10-28 17:09   ` Matt Wette
2017-11-24 15:54 ` bug#27782: mmap for guile Matt Wette
2017-11-24 16:22   ` Nala Ginrut
2017-11-24 17:09     ` Matt Wette
2017-11-25 14:41       ` Matt Wette
2017-11-25 16:17         ` Nala Ginrut
2020-07-04 19:40 ` Matt Wette [this message]
2020-07-09 12:45   ` bug#27782: new patch for mma Ludovic Courtès
2022-12-21  1:21 ` bug#27782: patch to add support for mmap and friends Matt Wette
2022-12-22 18:49   ` Matt Wette
2023-01-14  0:49 ` bug#27782: patch " Matt Wette
2023-02-14 14:50 ` bug#27782: mman patch for v3.0.9 Matt Wette
2023-03-01 13:31   ` Matt Wette

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://www.gnu.org/software/guile/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=dbe12707-2a97-db9a-08f6-677b843021c1@gmail.com \
    --to=matt.wette@gmail.com \
    --cc=27782@debbugs.gnu.org \
    --cc=ludo@gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).