unofficial mirror of guile-devel@gnu.org 
 help / color / mirror / Atom feed
* [PATCH] Implement efficient 'scm_unget_bytes' and use it
@ 2013-04-06  6:28 Mark H Weaver
  2013-04-06  7:39 ` Chris K. Jester-Young
  2013-04-06  7:47 ` [PATCH] Implement efficient 'scm_unget_bytes' and 'unget-bytevector' Mark H Weaver
  0 siblings, 2 replies; 10+ messages in thread
From: Mark H Weaver @ 2013-04-06  6:28 UTC (permalink / raw)
  To: guile-devel

[-- Attachment #1: Type: text/plain, Size: 608 bytes --]

Hello all,

I discovered that 'scm_unget_byte' is kind of dumb.  It puts the bytes
at the beginning of the pushback buffer instead of the end.  This means
that every time you unget a byte, it has to shift up the existing
contents of the buffer, so ungetting N bytes takes O(N^2) time.

This patch implements a function 'scm_unget_bytes' that enables large
buffers to be unread efficiently.  It keeps the bytes at the end of the
buffer instead of the beginning, but it can cope if some external code
manipulates the pushback buffer by hand and puts the bytes at the
beginning.

What do you think?

    Mark



[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: [PATCH] Implement efficient 'scm_unget_bytes' and use it --]
[-- Type: text/x-diff, Size: 8094 bytes --]

From 58c5bda77b29539a805aaa503c7dad9cd5f2ddf6 Mon Sep 17 00:00:00 2001
From: Mark H Weaver <mhw@netris.org>
Date: Sat, 6 Apr 2013 01:42:45 -0400
Subject: [PATCH] Implement efficient 'scm_unget_bytes' and use it.

* libguile/ports.c (scm_i_unget_bytes): New static function.
  (scm_unget_bytes): New API function.
  (scm_unget_byte): Rewrite to simply call 'scm_i_unget_bytes'.
  (scm_ungetc, scm_peek_char, looking_at_bytes): Use 'scm_i_unget_bytes'.

* libguile/ports.h: Add prototype for 'scm_unget_bytes'.

* libguile/fports.c (scm_setvbuf): Use 'scm_unget_bytes'.
---
 libguile/fports.c |    3 +-
 libguile/ports.c  |  130 ++++++++++++++++++++++++++++++++---------------------
 libguile/ports.h  |    1 +
 3 files changed, 80 insertions(+), 54 deletions(-)

diff --git a/libguile/fports.c b/libguile/fports.c
index f6c3c92..ffe4334 100644
--- a/libguile/fports.c
+++ b/libguile/fports.c
@@ -225,8 +225,7 @@ SCM_DEFINE (scm_setvbuf, "setvbuf", 2, 1, 0,
 
   if (ndrained > 0)
     /* Put DRAINED back to PORT.  */
-    while (ndrained-- > 0)
-      scm_unget_byte (drained[ndrained], port);
+    scm_unget_bytes ((unsigned char *) drained, ndrained, port);
 
   return SCM_UNSPECIFIED;
 }
diff --git a/libguile/ports.c b/libguile/ports.c
index 47dc165..9068c5c 100644
--- a/libguile/ports.c
+++ b/libguile/ports.c
@@ -1789,52 +1789,25 @@ scm_end_input (SCM port)
 \f
 
 
-void 
-scm_unget_byte (int c, SCM port)
-#define FUNC_NAME "scm_unget_byte"
+static void
+scm_i_unget_bytes (const unsigned char *buf, size_t len, SCM port)
+#define FUNC_NAME "scm_unget_bytes"
 {
   scm_t_port *pt = SCM_PTAB_ENTRY (port);
+  size_t old_len, new_len;
 
   scm_i_clear_pending_eof (port);
-  if (pt->read_buf == pt->putback_buf)
-    /* already using the put-back buffer.  */
-    {
-      /* enlarge putback_buf if necessary.  */
-      if (pt->read_end == pt->read_buf + pt->read_buf_size
-	  && pt->read_buf == pt->read_pos)
-	{
-	  size_t new_size = pt->read_buf_size * 2;
-	  unsigned char *tmp = (unsigned char *)
-	    scm_gc_realloc (pt->putback_buf, pt->read_buf_size, new_size,
-			    "putback buffer");
-
-	  pt->read_pos = pt->read_buf = pt->putback_buf = tmp;
-	  pt->read_end = pt->read_buf + pt->read_buf_size;
-	  pt->read_buf_size = pt->putback_buf_size = new_size;
-	}
 
-      /* shift any existing bytes to buffer + 1.  */
-      if (pt->read_pos == pt->read_end)
-	pt->read_end = pt->read_buf + 1;
-      else if (pt->read_pos != pt->read_buf + 1)
-	{
-	  int count = pt->read_end - pt->read_pos;
-
-	  memmove (pt->read_buf + 1, pt->read_pos, count);
-	  pt->read_end = pt->read_buf + 1 + count;
-	}
-
-      pt->read_pos = pt->read_buf;
-    }
-  else
+  if (pt->read_buf != pt->putback_buf)
     /* switch to the put-back buffer.  */
     {
       if (pt->putback_buf == NULL)
 	{
+          pt->putback_buf_size = (len > SCM_INITIAL_PUTBACK_BUF_SIZE
+                                  ? len : SCM_INITIAL_PUTBACK_BUF_SIZE);
 	  pt->putback_buf
 	    = (unsigned char *) scm_gc_malloc_pointerless
-	    (SCM_INITIAL_PUTBACK_BUF_SIZE, "putback buffer");
-	  pt->putback_buf_size = SCM_INITIAL_PUTBACK_BUF_SIZE;
+	    (pt->putback_buf_size, "putback buffer");
 	}
 
       pt->saved_read_buf = pt->read_buf;
@@ -1842,12 +1815,59 @@ scm_unget_byte (int c, SCM port)
       pt->saved_read_end = pt->read_end;
       pt->saved_read_buf_size = pt->read_buf_size;
 
-      pt->read_pos = pt->read_buf = pt->putback_buf;
-      pt->read_end = pt->read_buf + 1;
+      /* Put read_pos at the end of the buffer, so that ungets will not
+         have to shift the buffer contents each time.  */
+      pt->read_buf = pt->putback_buf;
+      pt->read_pos = pt->read_end = pt->putback_buf + pt->putback_buf_size;
       pt->read_buf_size = pt->putback_buf_size;
     }
 
-  *pt->read_buf = c;
+  old_len = pt->read_end - pt->read_pos;
+  new_len = old_len + len;
+
+  if (new_len > pt->read_buf_size)
+    /* The putback buffer needs to be enlarged.  */
+    {
+      size_t new_buf_size;
+      unsigned char *new_buf, *new_end, *new_pos;
+
+      new_buf_size = pt->read_buf_size * 2;
+      if (new_buf_size < new_len)
+        new_buf_size = new_len;
+
+      new_buf = (unsigned char *)
+        scm_gc_malloc_pointerless (new_buf_size, "putback buffer");
+
+      /* Put the bytes at the end of the buffer, so that future
+         ungets won't need to shift the buffer.  */
+      new_end = new_buf + new_buf_size;
+      new_pos = new_end - old_len;
+      memcpy (new_pos, pt->read_pos, old_len);
+
+      pt->read_buf = pt->putback_buf = new_buf;
+      pt->read_pos = new_pos;
+      pt->read_end = new_end;
+      pt->read_buf_size = pt->putback_buf_size = new_buf_size;
+    }
+  else if (pt->read_buf + len < pt->read_pos)
+    /* If needed, shift the existing buffer contents up.
+       This should not happen unless some external code
+       manipulates the putback buffer pointers.  */
+    {
+      unsigned char *new_end = pt->read_buf + pt->read_buf_size;
+      unsigned char *new_pos = new_end - old_len;
+
+      memmove (new_pos, pt->read_pos, old_len);
+      pt->read_pos = new_pos;
+      pt->read_end = new_end;
+    }
+
+  /* Move read_pos back and copy the bytes there.  */
+  pt->read_pos -= len;
+  memcpy (pt->read_buf + (pt->read_pos - pt->read_buf), buf, len);
+
+  if (pt->rw_active == SCM_PORT_WRITE)
+    scm_flush (port);
 
   if (pt->rw_random)
     pt->rw_active = SCM_PORT_READ;
@@ -1855,6 +1875,21 @@ scm_unget_byte (int c, SCM port)
 #undef FUNC_NAME
 
 void
+scm_unget_bytes (const unsigned char *buf, size_t len, SCM port)
+{
+  scm_i_unget_bytes (buf, len, port);
+}
+
+void
+scm_unget_byte (int c, SCM port)
+{
+  unsigned char byte;
+
+  byte = c;
+  scm_i_unget_bytes (&byte, 1, port);
+}
+
+void
 scm_ungetc (scm_t_wchar c, SCM port)
 #define FUNC_NAME "scm_ungetc"
 {
@@ -1863,7 +1898,6 @@ scm_ungetc (scm_t_wchar c, SCM port)
   char result_buf[10];
   const char *encoding;
   size_t len;
-  int i;
 
   if (pt->encoding != NULL)
     encoding = pt->encoding;
@@ -1881,8 +1915,7 @@ scm_ungetc (scm_t_wchar c, SCM port)
 			"conversion to port encoding failed",
 			SCM_BOOL_F, SCM_MAKE_CHAR (c));
 
-  for (i = len - 1; i >= 0; i--)
-    scm_unget_byte (result[i], port);
+  scm_i_unget_bytes ((unsigned char *) result, len, port);
 
   if (SCM_UNLIKELY (result != result_buf))
     free (result);
@@ -1941,7 +1974,7 @@ SCM_DEFINE (scm_peek_char, "peek-char", 0, 1, 0,
   SCM result;
   scm_t_wchar c;
   char bytes[SCM_MBCHAR_BUF_SIZE];
-  long column, line, i;
+  long column, line;
   size_t len;
 
   if (SCM_UNBNDP (port))
@@ -1953,8 +1986,7 @@ SCM_DEFINE (scm_peek_char, "peek-char", 0, 1, 0,
 
   err = get_codepoint (port, &c, bytes, &len);
 
-  for (i = len - 1; i >= 0; i--)
-    scm_unget_byte (bytes[i], port);
+  scm_i_unget_bytes ((unsigned char *) bytes, len, port);
 
   SCM_COL (port) = column;
   SCM_LINUM (port) = line;
@@ -2336,7 +2368,6 @@ static int
 looking_at_bytes (SCM port, const unsigned char *bytes, int len)
 {
   scm_t_port *pt = SCM_PTAB_ENTRY (port);
-  int result;
   int i = 0;
 
   while (i < len && scm_peek_byte_or_eof (port) == bytes[i])
@@ -2344,13 +2375,8 @@ looking_at_bytes (SCM port, const unsigned char *bytes, int len)
       pt->read_pos++;
       i++;
     }
-
-  result = (i == len);
-
-  while (i > 0)
-    scm_unget_byte (bytes[--i], port);
-
-  return result;
+  scm_i_unget_bytes (bytes, i, port);
+  return (i == len);
 }
 
 static const unsigned char scm_utf8_bom[3]    = {0xEF, 0xBB, 0xBF};
diff --git a/libguile/ports.h b/libguile/ports.h
index ca5bf2f..39317f8 100644
--- a/libguile/ports.h
+++ b/libguile/ports.h
@@ -302,6 +302,7 @@ SCM_INTERNAL void scm_lfwrite_substr (SCM str, size_t start, size_t end,
 SCM_API void scm_flush (SCM port);
 SCM_API void scm_end_input (SCM port);
 SCM_API int scm_fill_input (SCM port);
+SCM_API void scm_unget_bytes (const unsigned char *buf, size_t len, SCM port);
 SCM_API void scm_unget_byte (int c, SCM port);
 SCM_API void scm_ungetc (scm_t_wchar c, SCM port);
 SCM_API void scm_ungets (const char *s, int n, SCM port);
-- 
1.7.10.4


^ permalink raw reply related	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2013-04-07 14:25 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-04-06  6:28 [PATCH] Implement efficient 'scm_unget_bytes' and use it Mark H Weaver
2013-04-06  7:39 ` Chris K. Jester-Young
2013-04-06  7:47 ` [PATCH] Implement efficient 'scm_unget_bytes' and 'unget-bytevector' Mark H Weaver
2013-04-06 10:01   ` Mike Gran
2013-04-06 14:08     ` Mark H Weaver
2013-04-06 23:07   ` Ludovic Courtès
2013-04-07  7:19     ` Mark H Weaver
2013-04-07  9:28       ` Ludovic Courtès
2013-04-07 13:01         ` Mark H Weaver
2013-04-07 14:25           ` Ludovic Courtès

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).