unofficial mirror of guile-devel@gnu.org 
 help / color / mirror / Atom feed
* [PATCH] Implement efficient 'scm_unget_bytes' and use it
@ 2013-04-06  6:28 Mark H Weaver
  2013-04-06  7:39 ` Chris K. Jester-Young
  2013-04-06  7:47 ` [PATCH] Implement efficient 'scm_unget_bytes' and 'unget-bytevector' Mark H Weaver
  0 siblings, 2 replies; 10+ messages in thread
From: Mark H Weaver @ 2013-04-06  6:28 UTC (permalink / raw)
  To: guile-devel

[-- Attachment #1: Type: text/plain, Size: 608 bytes --]

Hello all,

I discovered that 'scm_unget_byte' is kind of dumb.  It puts the bytes
at the beginning of the pushback buffer instead of the end.  This means
that every time you unget a byte, it has to shift up the existing
contents of the buffer, so ungetting N bytes takes O(N^2) time.

This patch implements a function 'scm_unget_bytes' that enables large
buffers to be unread efficiently.  It keeps the bytes at the end of the
buffer instead of the beginning, but it can cope if some external code
manipulates the pushback buffer by hand and puts the bytes at the
beginning.

What do you think?

    Mark



[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: [PATCH] Implement efficient 'scm_unget_bytes' and use it --]
[-- Type: text/x-diff, Size: 8094 bytes --]

From 58c5bda77b29539a805aaa503c7dad9cd5f2ddf6 Mon Sep 17 00:00:00 2001
From: Mark H Weaver <mhw@netris.org>
Date: Sat, 6 Apr 2013 01:42:45 -0400
Subject: [PATCH] Implement efficient 'scm_unget_bytes' and use it.

* libguile/ports.c (scm_i_unget_bytes): New static function.
  (scm_unget_bytes): New API function.
  (scm_unget_byte): Rewrite to simply call 'scm_i_unget_bytes'.
  (scm_ungetc, scm_peek_char, looking_at_bytes): Use 'scm_i_unget_bytes'.

* libguile/ports.h: Add prototype for 'scm_unget_bytes'.

* libguile/fports.c (scm_setvbuf): Use 'scm_unget_bytes'.
---
 libguile/fports.c |    3 +-
 libguile/ports.c  |  130 ++++++++++++++++++++++++++++++++---------------------
 libguile/ports.h  |    1 +
 3 files changed, 80 insertions(+), 54 deletions(-)

diff --git a/libguile/fports.c b/libguile/fports.c
index f6c3c92..ffe4334 100644
--- a/libguile/fports.c
+++ b/libguile/fports.c
@@ -225,8 +225,7 @@ SCM_DEFINE (scm_setvbuf, "setvbuf", 2, 1, 0,
 
   if (ndrained > 0)
     /* Put DRAINED back to PORT.  */
-    while (ndrained-- > 0)
-      scm_unget_byte (drained[ndrained], port);
+    scm_unget_bytes ((unsigned char *) drained, ndrained, port);
 
   return SCM_UNSPECIFIED;
 }
diff --git a/libguile/ports.c b/libguile/ports.c
index 47dc165..9068c5c 100644
--- a/libguile/ports.c
+++ b/libguile/ports.c
@@ -1789,52 +1789,25 @@ scm_end_input (SCM port)
 \f
 
 
-void 
-scm_unget_byte (int c, SCM port)
-#define FUNC_NAME "scm_unget_byte"
+static void
+scm_i_unget_bytes (const unsigned char *buf, size_t len, SCM port)
+#define FUNC_NAME "scm_unget_bytes"
 {
   scm_t_port *pt = SCM_PTAB_ENTRY (port);
+  size_t old_len, new_len;
 
   scm_i_clear_pending_eof (port);
-  if (pt->read_buf == pt->putback_buf)
-    /* already using the put-back buffer.  */
-    {
-      /* enlarge putback_buf if necessary.  */
-      if (pt->read_end == pt->read_buf + pt->read_buf_size
-	  && pt->read_buf == pt->read_pos)
-	{
-	  size_t new_size = pt->read_buf_size * 2;
-	  unsigned char *tmp = (unsigned char *)
-	    scm_gc_realloc (pt->putback_buf, pt->read_buf_size, new_size,
-			    "putback buffer");
-
-	  pt->read_pos = pt->read_buf = pt->putback_buf = tmp;
-	  pt->read_end = pt->read_buf + pt->read_buf_size;
-	  pt->read_buf_size = pt->putback_buf_size = new_size;
-	}
 
-      /* shift any existing bytes to buffer + 1.  */
-      if (pt->read_pos == pt->read_end)
-	pt->read_end = pt->read_buf + 1;
-      else if (pt->read_pos != pt->read_buf + 1)
-	{
-	  int count = pt->read_end - pt->read_pos;
-
-	  memmove (pt->read_buf + 1, pt->read_pos, count);
-	  pt->read_end = pt->read_buf + 1 + count;
-	}
-
-      pt->read_pos = pt->read_buf;
-    }
-  else
+  if (pt->read_buf != pt->putback_buf)
     /* switch to the put-back buffer.  */
     {
       if (pt->putback_buf == NULL)
 	{
+          pt->putback_buf_size = (len > SCM_INITIAL_PUTBACK_BUF_SIZE
+                                  ? len : SCM_INITIAL_PUTBACK_BUF_SIZE);
 	  pt->putback_buf
 	    = (unsigned char *) scm_gc_malloc_pointerless
-	    (SCM_INITIAL_PUTBACK_BUF_SIZE, "putback buffer");
-	  pt->putback_buf_size = SCM_INITIAL_PUTBACK_BUF_SIZE;
+	    (pt->putback_buf_size, "putback buffer");
 	}
 
       pt->saved_read_buf = pt->read_buf;
@@ -1842,12 +1815,59 @@ scm_unget_byte (int c, SCM port)
       pt->saved_read_end = pt->read_end;
       pt->saved_read_buf_size = pt->read_buf_size;
 
-      pt->read_pos = pt->read_buf = pt->putback_buf;
-      pt->read_end = pt->read_buf + 1;
+      /* Put read_pos at the end of the buffer, so that ungets will not
+         have to shift the buffer contents each time.  */
+      pt->read_buf = pt->putback_buf;
+      pt->read_pos = pt->read_end = pt->putback_buf + pt->putback_buf_size;
       pt->read_buf_size = pt->putback_buf_size;
     }
 
-  *pt->read_buf = c;
+  old_len = pt->read_end - pt->read_pos;
+  new_len = old_len + len;
+
+  if (new_len > pt->read_buf_size)
+    /* The putback buffer needs to be enlarged.  */
+    {
+      size_t new_buf_size;
+      unsigned char *new_buf, *new_end, *new_pos;
+
+      new_buf_size = pt->read_buf_size * 2;
+      if (new_buf_size < new_len)
+        new_buf_size = new_len;
+
+      new_buf = (unsigned char *)
+        scm_gc_malloc_pointerless (new_buf_size, "putback buffer");
+
+      /* Put the bytes at the end of the buffer, so that future
+         ungets won't need to shift the buffer.  */
+      new_end = new_buf + new_buf_size;
+      new_pos = new_end - old_len;
+      memcpy (new_pos, pt->read_pos, old_len);
+
+      pt->read_buf = pt->putback_buf = new_buf;
+      pt->read_pos = new_pos;
+      pt->read_end = new_end;
+      pt->read_buf_size = pt->putback_buf_size = new_buf_size;
+    }
+  else if (pt->read_buf + len < pt->read_pos)
+    /* If needed, shift the existing buffer contents up.
+       This should not happen unless some external code
+       manipulates the putback buffer pointers.  */
+    {
+      unsigned char *new_end = pt->read_buf + pt->read_buf_size;
+      unsigned char *new_pos = new_end - old_len;
+
+      memmove (new_pos, pt->read_pos, old_len);
+      pt->read_pos = new_pos;
+      pt->read_end = new_end;
+    }
+
+  /* Move read_pos back and copy the bytes there.  */
+  pt->read_pos -= len;
+  memcpy (pt->read_buf + (pt->read_pos - pt->read_buf), buf, len);
+
+  if (pt->rw_active == SCM_PORT_WRITE)
+    scm_flush (port);
 
   if (pt->rw_random)
     pt->rw_active = SCM_PORT_READ;
@@ -1855,6 +1875,21 @@ scm_unget_byte (int c, SCM port)
 #undef FUNC_NAME
 
 void
+scm_unget_bytes (const unsigned char *buf, size_t len, SCM port)
+{
+  scm_i_unget_bytes (buf, len, port);
+}
+
+void
+scm_unget_byte (int c, SCM port)
+{
+  unsigned char byte;
+
+  byte = c;
+  scm_i_unget_bytes (&byte, 1, port);
+}
+
+void
 scm_ungetc (scm_t_wchar c, SCM port)
 #define FUNC_NAME "scm_ungetc"
 {
@@ -1863,7 +1898,6 @@ scm_ungetc (scm_t_wchar c, SCM port)
   char result_buf[10];
   const char *encoding;
   size_t len;
-  int i;
 
   if (pt->encoding != NULL)
     encoding = pt->encoding;
@@ -1881,8 +1915,7 @@ scm_ungetc (scm_t_wchar c, SCM port)
 			"conversion to port encoding failed",
 			SCM_BOOL_F, SCM_MAKE_CHAR (c));
 
-  for (i = len - 1; i >= 0; i--)
-    scm_unget_byte (result[i], port);
+  scm_i_unget_bytes ((unsigned char *) result, len, port);
 
   if (SCM_UNLIKELY (result != result_buf))
     free (result);
@@ -1941,7 +1974,7 @@ SCM_DEFINE (scm_peek_char, "peek-char", 0, 1, 0,
   SCM result;
   scm_t_wchar c;
   char bytes[SCM_MBCHAR_BUF_SIZE];
-  long column, line, i;
+  long column, line;
   size_t len;
 
   if (SCM_UNBNDP (port))
@@ -1953,8 +1986,7 @@ SCM_DEFINE (scm_peek_char, "peek-char", 0, 1, 0,
 
   err = get_codepoint (port, &c, bytes, &len);
 
-  for (i = len - 1; i >= 0; i--)
-    scm_unget_byte (bytes[i], port);
+  scm_i_unget_bytes ((unsigned char *) bytes, len, port);
 
   SCM_COL (port) = column;
   SCM_LINUM (port) = line;
@@ -2336,7 +2368,6 @@ static int
 looking_at_bytes (SCM port, const unsigned char *bytes, int len)
 {
   scm_t_port *pt = SCM_PTAB_ENTRY (port);
-  int result;
   int i = 0;
 
   while (i < len && scm_peek_byte_or_eof (port) == bytes[i])
@@ -2344,13 +2375,8 @@ looking_at_bytes (SCM port, const unsigned char *bytes, int len)
       pt->read_pos++;
       i++;
     }
-
-  result = (i == len);
-
-  while (i > 0)
-    scm_unget_byte (bytes[--i], port);
-
-  return result;
+  scm_i_unget_bytes (bytes, i, port);
+  return (i == len);
 }
 
 static const unsigned char scm_utf8_bom[3]    = {0xEF, 0xBB, 0xBF};
diff --git a/libguile/ports.h b/libguile/ports.h
index ca5bf2f..39317f8 100644
--- a/libguile/ports.h
+++ b/libguile/ports.h
@@ -302,6 +302,7 @@ SCM_INTERNAL void scm_lfwrite_substr (SCM str, size_t start, size_t end,
 SCM_API void scm_flush (SCM port);
 SCM_API void scm_end_input (SCM port);
 SCM_API int scm_fill_input (SCM port);
+SCM_API void scm_unget_bytes (const unsigned char *buf, size_t len, SCM port);
 SCM_API void scm_unget_byte (int c, SCM port);
 SCM_API void scm_ungetc (scm_t_wchar c, SCM port);
 SCM_API void scm_ungets (const char *s, int n, SCM port);
-- 
1.7.10.4


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH] Implement efficient 'scm_unget_bytes' and use it
  2013-04-06  6:28 [PATCH] Implement efficient 'scm_unget_bytes' and use it Mark H Weaver
@ 2013-04-06  7:39 ` Chris K. Jester-Young
  2013-04-06  7:47 ` [PATCH] Implement efficient 'scm_unget_bytes' and 'unget-bytevector' Mark H Weaver
  1 sibling, 0 replies; 10+ messages in thread
From: Chris K. Jester-Young @ 2013-04-06  7:39 UTC (permalink / raw)
  To: guile-devel

On Sat, Apr 06, 2013 at 02:28:14AM -0400, Mark H Weaver wrote:
> This patch implements a function 'scm_unget_bytes' that enables large
> buffers to be unread efficiently.  It keeps the bytes at the end of the
> buffer instead of the beginning, but it can cope if some external code
> manipulates the pushback buffer by hand and puts the bytes at the
> beginning.

Looks good to me! I did write a comment on IRC about how the following
lines

+  if (new_len > pt->read_buf_size)
+    /* The putback buffer needs to be enlarged.  */

used inconsistent references to "read_buf" and "putback buffer", but
I don't think there's a very good solution for that, other than just
making the human reader aware that read_buf _is_ putback_buf in this
context.

Cheers,
Chris.



^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH] Implement efficient 'scm_unget_bytes' and 'unget-bytevector'
  2013-04-06  6:28 [PATCH] Implement efficient 'scm_unget_bytes' and use it Mark H Weaver
  2013-04-06  7:39 ` Chris K. Jester-Young
@ 2013-04-06  7:47 ` Mark H Weaver
  2013-04-06 10:01   ` Mike Gran
  2013-04-06 23:07   ` Ludovic Courtès
  1 sibling, 2 replies; 10+ messages in thread
From: Mark H Weaver @ 2013-04-06  7:47 UTC (permalink / raw)
  To: guile-devel

[-- Attachment #1: Type: text/plain, Size: 902 bytes --]

Mark H Weaver <mhw@netris.org> writes:

> I discovered that 'scm_unget_byte' is kind of dumb.  It puts the bytes
> at the beginning of the pushback buffer instead of the end.  This means
> that every time you unget a byte, it has to shift up the existing
> contents of the buffer, so ungetting N bytes takes O(N^2) time.
>
> This patch implements a function 'scm_unget_bytes' that enables large
> buffers to be unread efficiently.  It keeps the bytes at the end of the
> buffer instead of the beginning, but it can cope if some external code
> manipulates the pushback buffer by hand and puts the bytes at the
> beginning.

Here's an improved patch that also exports 'unget-bytevector' from
(ice-9 binary-ports).  I've used it to unget 15 megabytes, and it was
quite fast.  Unfortunately, I'm at a bit of a loss of where to document
it in the manual.

Comments and suggestions solicited.

      Mark



[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: [PATCH] Implement efficient 'scm_unget_bytes' and 'unget-bytevector' --]
[-- Type: text/x-diff, Size: 11849 bytes --]

From 00c36fdd2e4d94a37fa416e3bc8436f66bba612a Mon Sep 17 00:00:00 2001
From: Mark H Weaver <mhw@netris.org>
Date: Sat, 6 Apr 2013 01:42:45 -0400
Subject: [PATCH] Implement efficient 'scm_unget_bytes' and
 'unget-bytevector'.

* libguile/ports.c (scm_i_unget_bytes): New static function.
  (scm_unget_bytes): New API function.
  (scm_unget_byte): Rewrite to simply call 'scm_i_unget_bytes'.
  (scm_ungetc, scm_peek_char, looking_at_bytes): Use 'scm_i_unget_bytes'.

* libguile/ports.h: Add prototype for 'scm_unget_bytes'.

* libguile/fports.c (scm_setvbuf): Use 'scm_unget_bytes'.

* libguile/r6rs-ports.c (scm_unget_bytevector): New procedure.

* module/ice-9/binary-ports.scm (unget-bytevector): New export.

* test-suite/tests/ports.test ("unget-bytevector"): Add test.
---
 libguile/fports.c             |    3 +-
 libguile/ports.c              |  130 ++++++++++++++++++++++++-----------------
 libguile/ports.h              |    1 +
 libguile/r6rs-ports.c         |   43 ++++++++++++++
 module/ice-9/binary-ports.scm |    1 +
 test-suite/tests/ports.test   |   21 ++++++-
 6 files changed, 143 insertions(+), 56 deletions(-)

diff --git a/libguile/fports.c b/libguile/fports.c
index f6c3c92..ffe4334 100644
--- a/libguile/fports.c
+++ b/libguile/fports.c
@@ -225,8 +225,7 @@ SCM_DEFINE (scm_setvbuf, "setvbuf", 2, 1, 0,
 
   if (ndrained > 0)
     /* Put DRAINED back to PORT.  */
-    while (ndrained-- > 0)
-      scm_unget_byte (drained[ndrained], port);
+    scm_unget_bytes ((unsigned char *) drained, ndrained, port);
 
   return SCM_UNSPECIFIED;
 }
diff --git a/libguile/ports.c b/libguile/ports.c
index 47dc165..9068c5c 100644
--- a/libguile/ports.c
+++ b/libguile/ports.c
@@ -1789,52 +1789,25 @@ scm_end_input (SCM port)
 \f
 
 
-void 
-scm_unget_byte (int c, SCM port)
-#define FUNC_NAME "scm_unget_byte"
+static void
+scm_i_unget_bytes (const unsigned char *buf, size_t len, SCM port)
+#define FUNC_NAME "scm_unget_bytes"
 {
   scm_t_port *pt = SCM_PTAB_ENTRY (port);
+  size_t old_len, new_len;
 
   scm_i_clear_pending_eof (port);
-  if (pt->read_buf == pt->putback_buf)
-    /* already using the put-back buffer.  */
-    {
-      /* enlarge putback_buf if necessary.  */
-      if (pt->read_end == pt->read_buf + pt->read_buf_size
-	  && pt->read_buf == pt->read_pos)
-	{
-	  size_t new_size = pt->read_buf_size * 2;
-	  unsigned char *tmp = (unsigned char *)
-	    scm_gc_realloc (pt->putback_buf, pt->read_buf_size, new_size,
-			    "putback buffer");
-
-	  pt->read_pos = pt->read_buf = pt->putback_buf = tmp;
-	  pt->read_end = pt->read_buf + pt->read_buf_size;
-	  pt->read_buf_size = pt->putback_buf_size = new_size;
-	}
 
-      /* shift any existing bytes to buffer + 1.  */
-      if (pt->read_pos == pt->read_end)
-	pt->read_end = pt->read_buf + 1;
-      else if (pt->read_pos != pt->read_buf + 1)
-	{
-	  int count = pt->read_end - pt->read_pos;
-
-	  memmove (pt->read_buf + 1, pt->read_pos, count);
-	  pt->read_end = pt->read_buf + 1 + count;
-	}
-
-      pt->read_pos = pt->read_buf;
-    }
-  else
+  if (pt->read_buf != pt->putback_buf)
     /* switch to the put-back buffer.  */
     {
       if (pt->putback_buf == NULL)
 	{
+          pt->putback_buf_size = (len > SCM_INITIAL_PUTBACK_BUF_SIZE
+                                  ? len : SCM_INITIAL_PUTBACK_BUF_SIZE);
 	  pt->putback_buf
 	    = (unsigned char *) scm_gc_malloc_pointerless
-	    (SCM_INITIAL_PUTBACK_BUF_SIZE, "putback buffer");
-	  pt->putback_buf_size = SCM_INITIAL_PUTBACK_BUF_SIZE;
+	    (pt->putback_buf_size, "putback buffer");
 	}
 
       pt->saved_read_buf = pt->read_buf;
@@ -1842,12 +1815,59 @@ scm_unget_byte (int c, SCM port)
       pt->saved_read_end = pt->read_end;
       pt->saved_read_buf_size = pt->read_buf_size;
 
-      pt->read_pos = pt->read_buf = pt->putback_buf;
-      pt->read_end = pt->read_buf + 1;
+      /* Put read_pos at the end of the buffer, so that ungets will not
+         have to shift the buffer contents each time.  */
+      pt->read_buf = pt->putback_buf;
+      pt->read_pos = pt->read_end = pt->putback_buf + pt->putback_buf_size;
       pt->read_buf_size = pt->putback_buf_size;
     }
 
-  *pt->read_buf = c;
+  old_len = pt->read_end - pt->read_pos;
+  new_len = old_len + len;
+
+  if (new_len > pt->read_buf_size)
+    /* The putback buffer needs to be enlarged.  */
+    {
+      size_t new_buf_size;
+      unsigned char *new_buf, *new_end, *new_pos;
+
+      new_buf_size = pt->read_buf_size * 2;
+      if (new_buf_size < new_len)
+        new_buf_size = new_len;
+
+      new_buf = (unsigned char *)
+        scm_gc_malloc_pointerless (new_buf_size, "putback buffer");
+
+      /* Put the bytes at the end of the buffer, so that future
+         ungets won't need to shift the buffer.  */
+      new_end = new_buf + new_buf_size;
+      new_pos = new_end - old_len;
+      memcpy (new_pos, pt->read_pos, old_len);
+
+      pt->read_buf = pt->putback_buf = new_buf;
+      pt->read_pos = new_pos;
+      pt->read_end = new_end;
+      pt->read_buf_size = pt->putback_buf_size = new_buf_size;
+    }
+  else if (pt->read_buf + len < pt->read_pos)
+    /* If needed, shift the existing buffer contents up.
+       This should not happen unless some external code
+       manipulates the putback buffer pointers.  */
+    {
+      unsigned char *new_end = pt->read_buf + pt->read_buf_size;
+      unsigned char *new_pos = new_end - old_len;
+
+      memmove (new_pos, pt->read_pos, old_len);
+      pt->read_pos = new_pos;
+      pt->read_end = new_end;
+    }
+
+  /* Move read_pos back and copy the bytes there.  */
+  pt->read_pos -= len;
+  memcpy (pt->read_buf + (pt->read_pos - pt->read_buf), buf, len);
+
+  if (pt->rw_active == SCM_PORT_WRITE)
+    scm_flush (port);
 
   if (pt->rw_random)
     pt->rw_active = SCM_PORT_READ;
@@ -1855,6 +1875,21 @@ scm_unget_byte (int c, SCM port)
 #undef FUNC_NAME
 
 void
+scm_unget_bytes (const unsigned char *buf, size_t len, SCM port)
+{
+  scm_i_unget_bytes (buf, len, port);
+}
+
+void
+scm_unget_byte (int c, SCM port)
+{
+  unsigned char byte;
+
+  byte = c;
+  scm_i_unget_bytes (&byte, 1, port);
+}
+
+void
 scm_ungetc (scm_t_wchar c, SCM port)
 #define FUNC_NAME "scm_ungetc"
 {
@@ -1863,7 +1898,6 @@ scm_ungetc (scm_t_wchar c, SCM port)
   char result_buf[10];
   const char *encoding;
   size_t len;
-  int i;
 
   if (pt->encoding != NULL)
     encoding = pt->encoding;
@@ -1881,8 +1915,7 @@ scm_ungetc (scm_t_wchar c, SCM port)
 			"conversion to port encoding failed",
 			SCM_BOOL_F, SCM_MAKE_CHAR (c));
 
-  for (i = len - 1; i >= 0; i--)
-    scm_unget_byte (result[i], port);
+  scm_i_unget_bytes ((unsigned char *) result, len, port);
 
   if (SCM_UNLIKELY (result != result_buf))
     free (result);
@@ -1941,7 +1974,7 @@ SCM_DEFINE (scm_peek_char, "peek-char", 0, 1, 0,
   SCM result;
   scm_t_wchar c;
   char bytes[SCM_MBCHAR_BUF_SIZE];
-  long column, line, i;
+  long column, line;
   size_t len;
 
   if (SCM_UNBNDP (port))
@@ -1953,8 +1986,7 @@ SCM_DEFINE (scm_peek_char, "peek-char", 0, 1, 0,
 
   err = get_codepoint (port, &c, bytes, &len);
 
-  for (i = len - 1; i >= 0; i--)
-    scm_unget_byte (bytes[i], port);
+  scm_i_unget_bytes ((unsigned char *) bytes, len, port);
 
   SCM_COL (port) = column;
   SCM_LINUM (port) = line;
@@ -2336,7 +2368,6 @@ static int
 looking_at_bytes (SCM port, const unsigned char *bytes, int len)
 {
   scm_t_port *pt = SCM_PTAB_ENTRY (port);
-  int result;
   int i = 0;
 
   while (i < len && scm_peek_byte_or_eof (port) == bytes[i])
@@ -2344,13 +2375,8 @@ looking_at_bytes (SCM port, const unsigned char *bytes, int len)
       pt->read_pos++;
       i++;
     }
-
-  result = (i == len);
-
-  while (i > 0)
-    scm_unget_byte (bytes[--i], port);
-
-  return result;
+  scm_i_unget_bytes (bytes, i, port);
+  return (i == len);
 }
 
 static const unsigned char scm_utf8_bom[3]    = {0xEF, 0xBB, 0xBF};
diff --git a/libguile/ports.h b/libguile/ports.h
index ca5bf2f..39317f8 100644
--- a/libguile/ports.h
+++ b/libguile/ports.h
@@ -302,6 +302,7 @@ SCM_INTERNAL void scm_lfwrite_substr (SCM str, size_t start, size_t end,
 SCM_API void scm_flush (SCM port);
 SCM_API void scm_end_input (SCM port);
 SCM_API int scm_fill_input (SCM port);
+SCM_API void scm_unget_bytes (const unsigned char *buf, size_t len, SCM port);
 SCM_API void scm_unget_byte (int c, SCM port);
 SCM_API void scm_ungetc (scm_t_wchar c, SCM port);
 SCM_API void scm_ungets (const char *s, int n, SCM port);
diff --git a/libguile/r6rs-ports.c b/libguile/r6rs-ports.c
index 48f9f26..aa3c935 100644
--- a/libguile/r6rs-ports.c
+++ b/libguile/r6rs-ports.c
@@ -714,6 +714,49 @@ SCM_DEFINE (scm_put_bytevector, "put-bytevector", 2, 2, 0,
 }
 #undef FUNC_NAME
 
+SCM_DEFINE (scm_unget_bytevector, "unget-bytevector", 2, 2, 0,
+	    (SCM port, SCM bv, SCM start, SCM count),
+	    "Unget the contents of @var{bv} to @var{port}, optionally "
+	    "starting at index @var{start} and limiting to @var{count} "
+	    "octets.")
+#define FUNC_NAME s_scm_unget_bytevector
+{
+  char *c_bv;
+  unsigned c_start, c_count, c_len;
+
+  SCM_VALIDATE_BINARY_INPUT_PORT (1, port);
+  SCM_VALIDATE_BYTEVECTOR (2, bv);
+
+  c_len = SCM_BYTEVECTOR_LENGTH (bv);
+  c_bv = (char *) SCM_BYTEVECTOR_CONTENTS (bv);
+
+  if (!scm_is_eq (start, SCM_UNDEFINED))
+    {
+      c_start = scm_to_uint (start);
+
+      if (!scm_is_eq (count, SCM_UNDEFINED))
+	{
+	  c_count = scm_to_uint (count);
+	  if (SCM_UNLIKELY (c_start + c_count > c_len))
+	    scm_out_of_range (FUNC_NAME, count);
+	}
+      else
+	{
+	  if (SCM_UNLIKELY (c_start >= c_len))
+	    scm_out_of_range (FUNC_NAME, start);
+	  else
+	    c_count = c_len - c_start;
+	}
+    }
+  else
+    c_start = 0, c_count = c_len;
+
+  scm_unget_bytes ((unsigned char *) c_bv + c_start, c_count, port);
+
+  return SCM_UNSPECIFIED;
+}
+#undef FUNC_NAME
+
 
 \f
 /* Bytevector output port ("bop" for short).  */
diff --git a/module/ice-9/binary-ports.scm b/module/ice-9/binary-ports.scm
index c07900b..cd7d155 100644
--- a/module/ice-9/binary-ports.scm
+++ b/module/ice-9/binary-ports.scm
@@ -40,6 +40,7 @@
             get-string-n!
             put-u8
             put-bytevector
+            unget-bytevector
             open-bytevector-output-port
             make-custom-binary-output-port))
 
diff --git a/test-suite/tests/ports.test b/test-suite/tests/ports.test
index 0dbd3b2..7d16399 100644
--- a/test-suite/tests/ports.test
+++ b/test-suite/tests/ports.test
@@ -24,8 +24,12 @@
   #:use-module (ice-9 popen)
   #:use-module (ice-9 rdelim)
   #:use-module (rnrs bytevectors)
-  #:use-module ((rnrs io ports) #:select (open-bytevector-input-port
-                                          open-bytevector-output-port)))
+  #:use-module ((ice-9 binary-ports) #:select (open-bytevector-input-port
+                                               open-bytevector-output-port
+                                               put-bytevector
+                                               get-bytevector-n
+                                               get-bytevector-all
+                                               unget-bytevector)))
 
 (define (display-line . args)
   (for-each display args)
@@ -1236,6 +1240,19 @@
 
 \f
 
+(pass-if-equal "unget-bytevector"
+    #vu8(10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 200 201 202 203
+            1 2 3 4 251 253 254 255)
+  (let ((port (open-bytevector-input-port #vu8(1 2 3 4 251 253 254 255))))
+    (unget-bytevector port #vu8(200 201 202 203))
+    (unget-bytevector port #vu8(20 21 22 23 24))
+    (unget-bytevector port #vu8(10 11 12 13 14 15 16 17 18 19) 4)
+    (unget-bytevector port #vu8(10 11 12 13 14 15 16 17 18 19) 2 2)
+    (unget-bytevector port #vu8(10 11))
+    (get-bytevector-all port)))
+
+\f
+
 (with-test-prefix "unicode byte-order marks (BOMs)"
 
   (define (bv-read-test* encoding bv proc)
-- 
1.7.10.4


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH] Implement efficient 'scm_unget_bytes' and 'unget-bytevector'
  2013-04-06  7:47 ` [PATCH] Implement efficient 'scm_unget_bytes' and 'unget-bytevector' Mark H Weaver
@ 2013-04-06 10:01   ` Mike Gran
  2013-04-06 14:08     ` Mark H Weaver
  2013-04-06 23:07   ` Ludovic Courtès
  1 sibling, 1 reply; 10+ messages in thread
From: Mike Gran @ 2013-04-06 10:01 UTC (permalink / raw)
  To: Mark H Weaver, guile-devel@gnu.org

>>  I discovered that 'scm_unget_byte' is kind of dumb.  It puts the 
> bytes
>>  at the beginning of the pushback buffer instead of the end.  This means
>>  that every time you unget a byte, it has to shift up the existing
>>  contents of the buffer, so ungetting N bytes takes O(N^2) time.
>> 
>>  This patch implements a function 'scm_unget_bytes' that enables 
> large
>>  buffers to be unread efficiently.  It keeps the bytes at the end of the
>>  buffer instead of the beginning, but it can cope if some external code
>>  manipulates the pushback buffer by hand and puts the bytes at the
>>  beginning.
 
Have you checked that it works with gnome-vfs-port in guile-gnome?
 
It is true that none of Guile's clients ever look at putback_buf?
 
-Mike 



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] Implement efficient 'scm_unget_bytes' and 'unget-bytevector'
  2013-04-06 10:01   ` Mike Gran
@ 2013-04-06 14:08     ` Mark H Weaver
  0 siblings, 0 replies; 10+ messages in thread
From: Mark H Weaver @ 2013-04-06 14:08 UTC (permalink / raw)
  To: Mike Gran; +Cc: guile-devel@gnu.org

Mike Gran <spk121@yahoo.com> writes:

> It is true that none of Guile's clients ever look at putback_buf?

I'm assuming the worst: that Guile's clients might look at, and
manipulate the 'putback_buf' directly.  The way I'm filling
'putback_buf' can *already* happen today.

Here's how it can happen today: unget enough bytes to exactly fill
'putback_buf', and then read some of those bytes.  That will result in
*exactly* the same putback buffer state as what I'm doing here.

> Have you checked that it works with gnome-vfs-port in guile-gnome?

*sigh*  I just looked at that code, and it doesn't do anything the
least bit unusual with the buffers.  No, I didn't run a test with that
code, but even if I did, that wouldn't answer the question of whether
there exists code out there that would be broken by this change.  For
that, only the logic above can help.

Again, the state I'm putting the buffers in can already happen today.
Any code that can't cope with that state is already a ticking time bomb.

      Mark



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] Implement efficient 'scm_unget_bytes' and 'unget-bytevector'
  2013-04-06  7:47 ` [PATCH] Implement efficient 'scm_unget_bytes' and 'unget-bytevector' Mark H Weaver
  2013-04-06 10:01   ` Mike Gran
@ 2013-04-06 23:07   ` Ludovic Courtès
  2013-04-07  7:19     ` Mark H Weaver
  1 sibling, 1 reply; 10+ messages in thread
From: Ludovic Courtès @ 2013-04-06 23:07 UTC (permalink / raw)
  To: guile-devel

Mark H Weaver <mhw@netris.org> skribis:

> Mark H Weaver <mhw@netris.org> writes:
>
>> I discovered that 'scm_unget_byte' is kind of dumb.  It puts the bytes
>> at the beginning of the pushback buffer instead of the end.  This means
>> that every time you unget a byte, it has to shift up the existing
>> contents of the buffer, so ungetting N bytes takes O(N^2) time.
>>
>> This patch implements a function 'scm_unget_bytes' that enables large
>> buffers to be unread efficiently.  It keeps the bytes at the end of the
>> buffer instead of the beginning, but it can cope if some external code
>> manipulates the pushback buffer by hand and puts the bytes at the
>> beginning.
>
> Here's an improved patch that also exports 'unget-bytevector' from
> (ice-9 binary-ports).

LGTM.

> I've used it to unget 15 megabytes, and it was quite fast.
> Unfortunately, I'm at a bit of a loss of where to document it in the
> manual.

What about adding a sentence to mention (ice-9 bytevectors) under
“Bytevectors”, and then ‘unget-bytevector’ under “R6RS Binary Input”?

Ludo’.




^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] Implement efficient 'scm_unget_bytes' and 'unget-bytevector'
  2013-04-06 23:07   ` Ludovic Courtès
@ 2013-04-07  7:19     ` Mark H Weaver
  2013-04-07  9:28       ` Ludovic Courtès
  0 siblings, 1 reply; 10+ messages in thread
From: Mark H Weaver @ 2013-04-07  7:19 UTC (permalink / raw)
  To: Ludovic Courtès; +Cc: guile-devel

Hi Ludovic,

ludo@gnu.org (Ludovic Courtès) writes:

> Mark H Weaver <mhw@netris.org> skribis:
>
>> Here's an improved patch that also exports 'unget-bytevector' from
>> (ice-9 binary-ports).
>
> LGTM.

Excellent :)

>> I've used it to unget 15 megabytes, and it was quite fast.
>> Unfortunately, I'm at a bit of a loss of where to document it in the
>> manual.
>
> What about adding a sentence to mention (ice-9 bytevectors) under
> “Bytevectors”, and then ‘unget-bytevector’ under “R6RS Binary Input”?

The problem is that "R6RS Binary Input" describes procedures exported
from (rnrs io ports), but 'unget-bytevector' is not exported from that
module, nor should it be.

'unget-bytevector' is exported only from (ice-9 binary-ports), and there
is no section of the manual that describes that module.  It is only
mentioned briefly at the end of 6.14.10 (R6RS I/O Ports) as follows:

   A subset of the `(rnrs io ports)' module is provided by the `(ice-9
   binary-ports)' module.  It contains binary input/output procedures
   and does not rely on R6RS support.

Unfortunately, this patch invalidates the above claim that (ice-9
binary-ports) is a subset of (rnrs io ports).

I see no easy fix.  Maybe the documentation for the procedures exported
from (ice-9 binary-ports) should be moved into a separate "Binary I/O"
subsection of 6.14 (Input and Output), and the corresponding procedure
entries in 6.14.10 (R6RS I/O Ports) should cross-reference the new
subsection?

What do you think?

    Thanks!
      Mark



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] Implement efficient 'scm_unget_bytes' and 'unget-bytevector'
  2013-04-07  7:19     ` Mark H Weaver
@ 2013-04-07  9:28       ` Ludovic Courtès
  2013-04-07 13:01         ` Mark H Weaver
  0 siblings, 1 reply; 10+ messages in thread
From: Ludovic Courtès @ 2013-04-07  9:28 UTC (permalink / raw)
  To: Mark H Weaver; +Cc: guile-devel

Mark H Weaver <mhw@netris.org> skribis:

> ludo@gnu.org (Ludovic Courtès) writes:
>
>> Mark H Weaver <mhw@netris.org> skribis:

[...]

>>> I've used it to unget 15 megabytes, and it was quite fast.
>>> Unfortunately, I'm at a bit of a loss of where to document it in the
>>> manual.
>>
>> What about adding a sentence to mention (ice-9 bytevectors) under
>> “Bytevectors”, and then ‘unget-bytevector’ under “R6RS Binary Input”?
>
> The problem is that "R6RS Binary Input" describes procedures exported
> from (rnrs io ports), but 'unget-bytevector' is not exported from that
> module, nor should it be.

Of course.

> 'unget-bytevector' is exported only from (ice-9 binary-ports), and there
> is no section of the manual that describes that module.  It is only
> mentioned briefly at the end of 6.14.10 (R6RS I/O Ports) as follows:
>
>    A subset of the `(rnrs io ports)' module is provided by the `(ice-9
>    binary-ports)' module.  It contains binary input/output procedures
>    and does not rely on R6RS support.
>
> Unfortunately, this patch invalidates the above claim that (ice-9
> binary-ports) is a subset of (rnrs io ports).
>
> I see no easy fix.  Maybe the documentation for the procedures exported
> from (ice-9 binary-ports) should be moved into a separate "Binary I/O"
> subsection of 6.14 (Input and Output), and the corresponding procedure
> entries in 6.14.10 (R6RS I/O Ports) should cross-reference the new
> subsection?

Yes, that’s one possibility.

A faster possibility (aka. let’s release!) would be to add something
like that at the end of “Binary Input”:

  The (ice-9 binary-ports) module provides the following procedure as an
  extension to (rnrs io ports):

WDYT?

Ludo’.



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] Implement efficient 'scm_unget_bytes' and 'unget-bytevector'
  2013-04-07  9:28       ` Ludovic Courtès
@ 2013-04-07 13:01         ` Mark H Weaver
  2013-04-07 14:25           ` Ludovic Courtès
  0 siblings, 1 reply; 10+ messages in thread
From: Mark H Weaver @ 2013-04-07 13:01 UTC (permalink / raw)
  To: Ludovic Courtès; +Cc: guile-devel

[-- Attachment #1: Type: text/plain, Size: 395 bytes --]

Hi Ludovic!

ludo@gnu.org (Ludovic Courtès) writes:

> A faster possibility (aka. let’s release!) would be to add something
> like that at the end of “Binary Input”:
>
>   The (ice-9 binary-ports) module provides the following procedure as an
>   extension to (rnrs io ports):

I like this idea, and have done so.  The updated patch is attached.

    Thanks!
      Mark



[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: [PATCH] Implement efficient 'scm_unget_bytes' and 'unget-bytevector' --]
[-- Type: text/x-diff, Size: 13842 bytes --]

From 96f6a960c2c0f0fd4037054f64aed97ea986ecd8 Mon Sep 17 00:00:00 2001
From: Mark H Weaver <mhw@netris.org>
Date: Sat, 6 Apr 2013 01:42:45 -0400
Subject: [PATCH 4/4] Implement efficient 'scm_unget_bytes' and
 'unget-bytevector'.

* libguile/ports.c (scm_i_unget_bytes): New static function.
  (scm_unget_bytes): New API function.
  (scm_unget_byte): Rewrite to simply call 'scm_i_unget_bytes'.
  (scm_ungetc, scm_peek_char, looking_at_bytes): Use 'scm_i_unget_bytes'.

* libguile/ports.h: Add prototype for 'scm_unget_bytes'.

* libguile/fports.c (scm_setvbuf): Use 'scm_unget_bytes'.

* libguile/r6rs-ports.c (scm_unget_bytevector): New procedure.

* module/ice-9/binary-ports.scm (unget-bytevector): New export.

* doc/ref/api-io.texi (R6RS Binary Input): Add documentation.
  (R6RS I/O Ports): Update brief description of (ice-9 binary-ports) to
  reflect the new reality: it is no longer a subset of (rnrs io ports).

* test-suite/tests/ports.test ("unget-bytevector"): Add test.
---
 doc/ref/api-io.texi           |   19 +++++-
 libguile/fports.c             |    3 +-
 libguile/ports.c              |  130 ++++++++++++++++++++++++-----------------
 libguile/ports.h              |    1 +
 libguile/r6rs-ports.c         |   43 ++++++++++++++
 module/ice-9/binary-ports.scm |    3 +-
 test-suite/tests/ports.test   |   17 +++++-
 7 files changed, 157 insertions(+), 59 deletions(-)

diff --git a/doc/ref/api-io.texi b/doc/ref/api-io.texi
index da57328..19e4a2f 100644
--- a/doc/ref/api-io.texi
+++ b/doc/ref/api-io.texi
@@ -1240,9 +1240,10 @@ possible.
 * R6RS Textual Output::         Textual output.
 @end menu
 
-A subset of the @code{(rnrs io ports)} module is provided by the
-@code{(ice-9 binary-ports)} module.  It contains binary input/output
-procedures and does not rely on R6RS support.
+A subset of the @code{(rnrs io ports)} module, plus one non-standard
+procedure @code{unget-bytevector} (@pxref{R6RS Binary Input}), is
+provided by the @code{(ice-9 binary-ports)} module.  It contains binary
+input/output procedures and does not rely on R6RS support.
 
 @node R6RS File Names
 @subsubsection File Names
@@ -1872,6 +1873,18 @@ reached.  Return either a new bytevector containing the data read or the
 end-of-file object (if no data were available).
 @end deffn
 
+The @code{(ice-9 binary-ports)} module provides the following procedure
+as an extension to @code{(rnrs io ports)}:
+
+@deffn {Scheme Procedure} unget-bytevector port bv [start [count]]
+@deffnx {C Function} scm_unget_bytevector (port, bv, start, count)
+Place the contents of @var{bv} in @var{port}, optionally starting at
+index @var{start} and limiting to @var{count} octets, so that its bytes
+will be read from left-to-right as the next bytes from @var{port} during
+subsequent read operations.  If called multiple times, the unread bytes
+will be read again in last-in first-out order.
+@end deffn
+
 @node R6RS Textual Input
 @subsubsection Textual Input
 
diff --git a/libguile/fports.c b/libguile/fports.c
index 4fc614e..1c6c509 100644
--- a/libguile/fports.c
+++ b/libguile/fports.c
@@ -225,8 +225,7 @@ SCM_DEFINE (scm_setvbuf, "setvbuf", 2, 1, 0,
 
   if (ndrained > 0)
     /* Put DRAINED back to PORT.  */
-    while (ndrained-- > 0)
-      scm_unget_byte (drained[ndrained], port);
+    scm_unget_bytes ((unsigned char *) drained, ndrained, port);
 
   return SCM_UNSPECIFIED;
 }
diff --git a/libguile/ports.c b/libguile/ports.c
index 47dc165..9068c5c 100644
--- a/libguile/ports.c
+++ b/libguile/ports.c
@@ -1789,52 +1789,25 @@ scm_end_input (SCM port)
 \f
 
 
-void 
-scm_unget_byte (int c, SCM port)
-#define FUNC_NAME "scm_unget_byte"
+static void
+scm_i_unget_bytes (const unsigned char *buf, size_t len, SCM port)
+#define FUNC_NAME "scm_unget_bytes"
 {
   scm_t_port *pt = SCM_PTAB_ENTRY (port);
+  size_t old_len, new_len;
 
   scm_i_clear_pending_eof (port);
-  if (pt->read_buf == pt->putback_buf)
-    /* already using the put-back buffer.  */
-    {
-      /* enlarge putback_buf if necessary.  */
-      if (pt->read_end == pt->read_buf + pt->read_buf_size
-	  && pt->read_buf == pt->read_pos)
-	{
-	  size_t new_size = pt->read_buf_size * 2;
-	  unsigned char *tmp = (unsigned char *)
-	    scm_gc_realloc (pt->putback_buf, pt->read_buf_size, new_size,
-			    "putback buffer");
-
-	  pt->read_pos = pt->read_buf = pt->putback_buf = tmp;
-	  pt->read_end = pt->read_buf + pt->read_buf_size;
-	  pt->read_buf_size = pt->putback_buf_size = new_size;
-	}
 
-      /* shift any existing bytes to buffer + 1.  */
-      if (pt->read_pos == pt->read_end)
-	pt->read_end = pt->read_buf + 1;
-      else if (pt->read_pos != pt->read_buf + 1)
-	{
-	  int count = pt->read_end - pt->read_pos;
-
-	  memmove (pt->read_buf + 1, pt->read_pos, count);
-	  pt->read_end = pt->read_buf + 1 + count;
-	}
-
-      pt->read_pos = pt->read_buf;
-    }
-  else
+  if (pt->read_buf != pt->putback_buf)
     /* switch to the put-back buffer.  */
     {
       if (pt->putback_buf == NULL)
 	{
+          pt->putback_buf_size = (len > SCM_INITIAL_PUTBACK_BUF_SIZE
+                                  ? len : SCM_INITIAL_PUTBACK_BUF_SIZE);
 	  pt->putback_buf
 	    = (unsigned char *) scm_gc_malloc_pointerless
-	    (SCM_INITIAL_PUTBACK_BUF_SIZE, "putback buffer");
-	  pt->putback_buf_size = SCM_INITIAL_PUTBACK_BUF_SIZE;
+	    (pt->putback_buf_size, "putback buffer");
 	}
 
       pt->saved_read_buf = pt->read_buf;
@@ -1842,12 +1815,59 @@ scm_unget_byte (int c, SCM port)
       pt->saved_read_end = pt->read_end;
       pt->saved_read_buf_size = pt->read_buf_size;
 
-      pt->read_pos = pt->read_buf = pt->putback_buf;
-      pt->read_end = pt->read_buf + 1;
+      /* Put read_pos at the end of the buffer, so that ungets will not
+         have to shift the buffer contents each time.  */
+      pt->read_buf = pt->putback_buf;
+      pt->read_pos = pt->read_end = pt->putback_buf + pt->putback_buf_size;
       pt->read_buf_size = pt->putback_buf_size;
     }
 
-  *pt->read_buf = c;
+  old_len = pt->read_end - pt->read_pos;
+  new_len = old_len + len;
+
+  if (new_len > pt->read_buf_size)
+    /* The putback buffer needs to be enlarged.  */
+    {
+      size_t new_buf_size;
+      unsigned char *new_buf, *new_end, *new_pos;
+
+      new_buf_size = pt->read_buf_size * 2;
+      if (new_buf_size < new_len)
+        new_buf_size = new_len;
+
+      new_buf = (unsigned char *)
+        scm_gc_malloc_pointerless (new_buf_size, "putback buffer");
+
+      /* Put the bytes at the end of the buffer, so that future
+         ungets won't need to shift the buffer.  */
+      new_end = new_buf + new_buf_size;
+      new_pos = new_end - old_len;
+      memcpy (new_pos, pt->read_pos, old_len);
+
+      pt->read_buf = pt->putback_buf = new_buf;
+      pt->read_pos = new_pos;
+      pt->read_end = new_end;
+      pt->read_buf_size = pt->putback_buf_size = new_buf_size;
+    }
+  else if (pt->read_buf + len < pt->read_pos)
+    /* If needed, shift the existing buffer contents up.
+       This should not happen unless some external code
+       manipulates the putback buffer pointers.  */
+    {
+      unsigned char *new_end = pt->read_buf + pt->read_buf_size;
+      unsigned char *new_pos = new_end - old_len;
+
+      memmove (new_pos, pt->read_pos, old_len);
+      pt->read_pos = new_pos;
+      pt->read_end = new_end;
+    }
+
+  /* Move read_pos back and copy the bytes there.  */
+  pt->read_pos -= len;
+  memcpy (pt->read_buf + (pt->read_pos - pt->read_buf), buf, len);
+
+  if (pt->rw_active == SCM_PORT_WRITE)
+    scm_flush (port);
 
   if (pt->rw_random)
     pt->rw_active = SCM_PORT_READ;
@@ -1855,6 +1875,21 @@ scm_unget_byte (int c, SCM port)
 #undef FUNC_NAME
 
 void
+scm_unget_bytes (const unsigned char *buf, size_t len, SCM port)
+{
+  scm_i_unget_bytes (buf, len, port);
+}
+
+void
+scm_unget_byte (int c, SCM port)
+{
+  unsigned char byte;
+
+  byte = c;
+  scm_i_unget_bytes (&byte, 1, port);
+}
+
+void
 scm_ungetc (scm_t_wchar c, SCM port)
 #define FUNC_NAME "scm_ungetc"
 {
@@ -1863,7 +1898,6 @@ scm_ungetc (scm_t_wchar c, SCM port)
   char result_buf[10];
   const char *encoding;
   size_t len;
-  int i;
 
   if (pt->encoding != NULL)
     encoding = pt->encoding;
@@ -1881,8 +1915,7 @@ scm_ungetc (scm_t_wchar c, SCM port)
 			"conversion to port encoding failed",
 			SCM_BOOL_F, SCM_MAKE_CHAR (c));
 
-  for (i = len - 1; i >= 0; i--)
-    scm_unget_byte (result[i], port);
+  scm_i_unget_bytes ((unsigned char *) result, len, port);
 
   if (SCM_UNLIKELY (result != result_buf))
     free (result);
@@ -1941,7 +1974,7 @@ SCM_DEFINE (scm_peek_char, "peek-char", 0, 1, 0,
   SCM result;
   scm_t_wchar c;
   char bytes[SCM_MBCHAR_BUF_SIZE];
-  long column, line, i;
+  long column, line;
   size_t len;
 
   if (SCM_UNBNDP (port))
@@ -1953,8 +1986,7 @@ SCM_DEFINE (scm_peek_char, "peek-char", 0, 1, 0,
 
   err = get_codepoint (port, &c, bytes, &len);
 
-  for (i = len - 1; i >= 0; i--)
-    scm_unget_byte (bytes[i], port);
+  scm_i_unget_bytes ((unsigned char *) bytes, len, port);
 
   SCM_COL (port) = column;
   SCM_LINUM (port) = line;
@@ -2336,7 +2368,6 @@ static int
 looking_at_bytes (SCM port, const unsigned char *bytes, int len)
 {
   scm_t_port *pt = SCM_PTAB_ENTRY (port);
-  int result;
   int i = 0;
 
   while (i < len && scm_peek_byte_or_eof (port) == bytes[i])
@@ -2344,13 +2375,8 @@ looking_at_bytes (SCM port, const unsigned char *bytes, int len)
       pt->read_pos++;
       i++;
     }
-
-  result = (i == len);
-
-  while (i > 0)
-    scm_unget_byte (bytes[--i], port);
-
-  return result;
+  scm_i_unget_bytes (bytes, i, port);
+  return (i == len);
 }
 
 static const unsigned char scm_utf8_bom[3]    = {0xEF, 0xBB, 0xBF};
diff --git a/libguile/ports.h b/libguile/ports.h
index ca5bf2f..39317f8 100644
--- a/libguile/ports.h
+++ b/libguile/ports.h
@@ -302,6 +302,7 @@ SCM_INTERNAL void scm_lfwrite_substr (SCM str, size_t start, size_t end,
 SCM_API void scm_flush (SCM port);
 SCM_API void scm_end_input (SCM port);
 SCM_API int scm_fill_input (SCM port);
+SCM_API void scm_unget_bytes (const unsigned char *buf, size_t len, SCM port);
 SCM_API void scm_unget_byte (int c, SCM port);
 SCM_API void scm_ungetc (scm_t_wchar c, SCM port);
 SCM_API void scm_ungets (const char *s, int n, SCM port);
diff --git a/libguile/r6rs-ports.c b/libguile/r6rs-ports.c
index 48f9f26..fecc5bd 100644
--- a/libguile/r6rs-ports.c
+++ b/libguile/r6rs-ports.c
@@ -714,6 +714,49 @@ SCM_DEFINE (scm_put_bytevector, "put-bytevector", 2, 2, 0,
 }
 #undef FUNC_NAME
 
+SCM_DEFINE (scm_unget_bytevector, "unget-bytevector", 2, 2, 0,
+	    (SCM port, SCM bv, SCM start, SCM count),
+	    "Unget the contents of @var{bv} to @var{port}, optionally "
+	    "starting at index @var{start} and limiting to @var{count} "
+	    "octets.")
+#define FUNC_NAME s_scm_unget_bytevector
+{
+  unsigned char *c_bv;
+  size_t c_start, c_count, c_len;
+
+  SCM_VALIDATE_BINARY_INPUT_PORT (1, port);
+  SCM_VALIDATE_BYTEVECTOR (2, bv);
+
+  c_len = SCM_BYTEVECTOR_LENGTH (bv);
+  c_bv = (unsigned char *) SCM_BYTEVECTOR_CONTENTS (bv);
+
+  if (!scm_is_eq (start, SCM_UNDEFINED))
+    {
+      c_start = scm_to_size_t (start);
+
+      if (!scm_is_eq (count, SCM_UNDEFINED))
+	{
+	  c_count = scm_to_size_t (count);
+	  if (SCM_UNLIKELY (c_start + c_count > c_len))
+	    scm_out_of_range (FUNC_NAME, count);
+	}
+      else
+	{
+	  if (SCM_UNLIKELY (c_start >= c_len))
+	    scm_out_of_range (FUNC_NAME, start);
+	  else
+	    c_count = c_len - c_start;
+	}
+    }
+  else
+    c_start = 0, c_count = c_len;
+
+  scm_unget_bytes (c_bv + c_start, c_count, port);
+
+  return SCM_UNSPECIFIED;
+}
+#undef FUNC_NAME
+
 
 \f
 /* Bytevector output port ("bop" for short).  */
diff --git a/module/ice-9/binary-ports.scm b/module/ice-9/binary-ports.scm
index c07900b..9d6c945 100644
--- a/module/ice-9/binary-ports.scm
+++ b/module/ice-9/binary-ports.scm
@@ -1,6 +1,6 @@
 ;;;; binary-ports.scm --- Binary IO on ports
 
-;;;;	Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc.
+;;;;	Copyright (C) 2009, 2010, 2011, 2013 Free Software Foundation, Inc.
 ;;;;
 ;;;; This library is free software; you can redistribute it and/or
 ;;;; modify it under the terms of the GNU Lesser General Public
@@ -40,6 +40,7 @@
             get-string-n!
             put-u8
             put-bytevector
+            unget-bytevector
             open-bytevector-output-port
             make-custom-binary-output-port))
 
diff --git a/test-suite/tests/ports.test b/test-suite/tests/ports.test
index 313cd36..8e3df5b 100644
--- a/test-suite/tests/ports.test
+++ b/test-suite/tests/ports.test
@@ -27,7 +27,9 @@
   #:use-module ((ice-9 binary-ports) #:select (open-bytevector-input-port
                                                open-bytevector-output-port
                                                put-bytevector
-                                               get-bytevector-all)))
+                                               get-bytevector-n
+                                               get-bytevector-all
+                                               unget-bytevector)))
 
 (define (display-line . args)
   (for-each display args)
@@ -1518,6 +1520,19 @@
 
 \f
 
+(pass-if-equal "unget-bytevector"
+    #vu8(10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 200 201 202 203
+            1 2 3 4 251 253 254 255)
+  (let ((port (open-bytevector-input-port #vu8(1 2 3 4 251 253 254 255))))
+    (unget-bytevector port #vu8(200 201 202 203))
+    (unget-bytevector port #vu8(20 21 22 23 24))
+    (unget-bytevector port #vu8(10 11 12 13 14 15 16 17 18 19) 4)
+    (unget-bytevector port #vu8(10 11 12 13 14 15 16 17 18 19) 2 2)
+    (unget-bytevector port #vu8(10 11))
+    (get-bytevector-all port)))
+
+\f
+
 (with-test-prefix "unicode byte-order marks (BOMs)"
 
   (define (bv-read-test* encoding bv proc)
-- 
1.7.10.4


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH] Implement efficient 'scm_unget_bytes' and 'unget-bytevector'
  2013-04-07 13:01         ` Mark H Weaver
@ 2013-04-07 14:25           ` Ludovic Courtès
  0 siblings, 0 replies; 10+ messages in thread
From: Ludovic Courtès @ 2013-04-07 14:25 UTC (permalink / raw)
  To: Mark H Weaver; +Cc: guile-devel

Mark H Weaver <mhw@netris.org> skribis:

> ludo@gnu.org (Ludovic Courtès) writes:
>
>> A faster possibility (aka. let’s release!) would be to add something
>> like that at the end of “Binary Input”:
>>
>>   The (ice-9 binary-ports) module provides the following procedure as an
>>   extension to (rnrs io ports):
>
> I like this idea, and have done so.  The updated patch is attached.

Perfect, please push!

Ludo’.



^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2013-04-07 14:25 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-04-06  6:28 [PATCH] Implement efficient 'scm_unget_bytes' and use it Mark H Weaver
2013-04-06  7:39 ` Chris K. Jester-Young
2013-04-06  7:47 ` [PATCH] Implement efficient 'scm_unget_bytes' and 'unget-bytevector' Mark H Weaver
2013-04-06 10:01   ` Mike Gran
2013-04-06 14:08     ` Mark H Weaver
2013-04-06 23:07   ` Ludovic Courtès
2013-04-07  7:19     ` Mark H Weaver
2013-04-07  9:28       ` Ludovic Courtès
2013-04-07 13:01         ` Mark H Weaver
2013-04-07 14:25           ` Ludovic Courtès

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).