unofficial mirror of guile-devel@gnu.org 
 help / color / mirror / Atom feed
From: Mark H Weaver <mhw@netris.org>
To: guile-devel@gnu.org
Subject: [PATCH] Implement efficient 'scm_unget_bytes' and use it
Date: Sat, 06 Apr 2013 02:28:14 -0400	[thread overview]
Message-ID: <87d2u8uq9t.fsf@tines.lan> (raw)

[-- Attachment #1: Type: text/plain, Size: 608 bytes --]

Hello all,

I discovered that 'scm_unget_byte' is kind of dumb.  It puts the bytes
at the beginning of the pushback buffer instead of the end.  This means
that every time you unget a byte, it has to shift up the existing
contents of the buffer, so ungetting N bytes takes O(N^2) time.

This patch implements a function 'scm_unget_bytes' that enables large
buffers to be unread efficiently.  It keeps the bytes at the end of the
buffer instead of the beginning, but it can cope if some external code
manipulates the pushback buffer by hand and puts the bytes at the
beginning.

What do you think?

    Mark



[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: [PATCH] Implement efficient 'scm_unget_bytes' and use it --]
[-- Type: text/x-diff, Size: 8094 bytes --]

From 58c5bda77b29539a805aaa503c7dad9cd5f2ddf6 Mon Sep 17 00:00:00 2001
From: Mark H Weaver <mhw@netris.org>
Date: Sat, 6 Apr 2013 01:42:45 -0400
Subject: [PATCH] Implement efficient 'scm_unget_bytes' and use it.

* libguile/ports.c (scm_i_unget_bytes): New static function.
  (scm_unget_bytes): New API function.
  (scm_unget_byte): Rewrite to simply call 'scm_i_unget_bytes'.
  (scm_ungetc, scm_peek_char, looking_at_bytes): Use 'scm_i_unget_bytes'.

* libguile/ports.h: Add prototype for 'scm_unget_bytes'.

* libguile/fports.c (scm_setvbuf): Use 'scm_unget_bytes'.
---
 libguile/fports.c |    3 +-
 libguile/ports.c  |  130 ++++++++++++++++++++++++++++++++---------------------
 libguile/ports.h  |    1 +
 3 files changed, 80 insertions(+), 54 deletions(-)

diff --git a/libguile/fports.c b/libguile/fports.c
index f6c3c92..ffe4334 100644
--- a/libguile/fports.c
+++ b/libguile/fports.c
@@ -225,8 +225,7 @@ SCM_DEFINE (scm_setvbuf, "setvbuf", 2, 1, 0,
 
   if (ndrained > 0)
     /* Put DRAINED back to PORT.  */
-    while (ndrained-- > 0)
-      scm_unget_byte (drained[ndrained], port);
+    scm_unget_bytes ((unsigned char *) drained, ndrained, port);
 
   return SCM_UNSPECIFIED;
 }
diff --git a/libguile/ports.c b/libguile/ports.c
index 47dc165..9068c5c 100644
--- a/libguile/ports.c
+++ b/libguile/ports.c
@@ -1789,52 +1789,25 @@ scm_end_input (SCM port)
 \f
 
 
-void 
-scm_unget_byte (int c, SCM port)
-#define FUNC_NAME "scm_unget_byte"
+static void
+scm_i_unget_bytes (const unsigned char *buf, size_t len, SCM port)
+#define FUNC_NAME "scm_unget_bytes"
 {
   scm_t_port *pt = SCM_PTAB_ENTRY (port);
+  size_t old_len, new_len;
 
   scm_i_clear_pending_eof (port);
-  if (pt->read_buf == pt->putback_buf)
-    /* already using the put-back buffer.  */
-    {
-      /* enlarge putback_buf if necessary.  */
-      if (pt->read_end == pt->read_buf + pt->read_buf_size
-	  && pt->read_buf == pt->read_pos)
-	{
-	  size_t new_size = pt->read_buf_size * 2;
-	  unsigned char *tmp = (unsigned char *)
-	    scm_gc_realloc (pt->putback_buf, pt->read_buf_size, new_size,
-			    "putback buffer");
-
-	  pt->read_pos = pt->read_buf = pt->putback_buf = tmp;
-	  pt->read_end = pt->read_buf + pt->read_buf_size;
-	  pt->read_buf_size = pt->putback_buf_size = new_size;
-	}
 
-      /* shift any existing bytes to buffer + 1.  */
-      if (pt->read_pos == pt->read_end)
-	pt->read_end = pt->read_buf + 1;
-      else if (pt->read_pos != pt->read_buf + 1)
-	{
-	  int count = pt->read_end - pt->read_pos;
-
-	  memmove (pt->read_buf + 1, pt->read_pos, count);
-	  pt->read_end = pt->read_buf + 1 + count;
-	}
-
-      pt->read_pos = pt->read_buf;
-    }
-  else
+  if (pt->read_buf != pt->putback_buf)
     /* switch to the put-back buffer.  */
     {
       if (pt->putback_buf == NULL)
 	{
+          pt->putback_buf_size = (len > SCM_INITIAL_PUTBACK_BUF_SIZE
+                                  ? len : SCM_INITIAL_PUTBACK_BUF_SIZE);
 	  pt->putback_buf
 	    = (unsigned char *) scm_gc_malloc_pointerless
-	    (SCM_INITIAL_PUTBACK_BUF_SIZE, "putback buffer");
-	  pt->putback_buf_size = SCM_INITIAL_PUTBACK_BUF_SIZE;
+	    (pt->putback_buf_size, "putback buffer");
 	}
 
       pt->saved_read_buf = pt->read_buf;
@@ -1842,12 +1815,59 @@ scm_unget_byte (int c, SCM port)
       pt->saved_read_end = pt->read_end;
       pt->saved_read_buf_size = pt->read_buf_size;
 
-      pt->read_pos = pt->read_buf = pt->putback_buf;
-      pt->read_end = pt->read_buf + 1;
+      /* Put read_pos at the end of the buffer, so that ungets will not
+         have to shift the buffer contents each time.  */
+      pt->read_buf = pt->putback_buf;
+      pt->read_pos = pt->read_end = pt->putback_buf + pt->putback_buf_size;
       pt->read_buf_size = pt->putback_buf_size;
     }
 
-  *pt->read_buf = c;
+  old_len = pt->read_end - pt->read_pos;
+  new_len = old_len + len;
+
+  if (new_len > pt->read_buf_size)
+    /* The putback buffer needs to be enlarged.  */
+    {
+      size_t new_buf_size;
+      unsigned char *new_buf, *new_end, *new_pos;
+
+      new_buf_size = pt->read_buf_size * 2;
+      if (new_buf_size < new_len)
+        new_buf_size = new_len;
+
+      new_buf = (unsigned char *)
+        scm_gc_malloc_pointerless (new_buf_size, "putback buffer");
+
+      /* Put the bytes at the end of the buffer, so that future
+         ungets won't need to shift the buffer.  */
+      new_end = new_buf + new_buf_size;
+      new_pos = new_end - old_len;
+      memcpy (new_pos, pt->read_pos, old_len);
+
+      pt->read_buf = pt->putback_buf = new_buf;
+      pt->read_pos = new_pos;
+      pt->read_end = new_end;
+      pt->read_buf_size = pt->putback_buf_size = new_buf_size;
+    }
+  else if (pt->read_buf + len < pt->read_pos)
+    /* If needed, shift the existing buffer contents up.
+       This should not happen unless some external code
+       manipulates the putback buffer pointers.  */
+    {
+      unsigned char *new_end = pt->read_buf + pt->read_buf_size;
+      unsigned char *new_pos = new_end - old_len;
+
+      memmove (new_pos, pt->read_pos, old_len);
+      pt->read_pos = new_pos;
+      pt->read_end = new_end;
+    }
+
+  /* Move read_pos back and copy the bytes there.  */
+  pt->read_pos -= len;
+  memcpy (pt->read_buf + (pt->read_pos - pt->read_buf), buf, len);
+
+  if (pt->rw_active == SCM_PORT_WRITE)
+    scm_flush (port);
 
   if (pt->rw_random)
     pt->rw_active = SCM_PORT_READ;
@@ -1855,6 +1875,21 @@ scm_unget_byte (int c, SCM port)
 #undef FUNC_NAME
 
 void
+scm_unget_bytes (const unsigned char *buf, size_t len, SCM port)
+{
+  scm_i_unget_bytes (buf, len, port);
+}
+
+void
+scm_unget_byte (int c, SCM port)
+{
+  unsigned char byte;
+
+  byte = c;
+  scm_i_unget_bytes (&byte, 1, port);
+}
+
+void
 scm_ungetc (scm_t_wchar c, SCM port)
 #define FUNC_NAME "scm_ungetc"
 {
@@ -1863,7 +1898,6 @@ scm_ungetc (scm_t_wchar c, SCM port)
   char result_buf[10];
   const char *encoding;
   size_t len;
-  int i;
 
   if (pt->encoding != NULL)
     encoding = pt->encoding;
@@ -1881,8 +1915,7 @@ scm_ungetc (scm_t_wchar c, SCM port)
 			"conversion to port encoding failed",
 			SCM_BOOL_F, SCM_MAKE_CHAR (c));
 
-  for (i = len - 1; i >= 0; i--)
-    scm_unget_byte (result[i], port);
+  scm_i_unget_bytes ((unsigned char *) result, len, port);
 
   if (SCM_UNLIKELY (result != result_buf))
     free (result);
@@ -1941,7 +1974,7 @@ SCM_DEFINE (scm_peek_char, "peek-char", 0, 1, 0,
   SCM result;
   scm_t_wchar c;
   char bytes[SCM_MBCHAR_BUF_SIZE];
-  long column, line, i;
+  long column, line;
   size_t len;
 
   if (SCM_UNBNDP (port))
@@ -1953,8 +1986,7 @@ SCM_DEFINE (scm_peek_char, "peek-char", 0, 1, 0,
 
   err = get_codepoint (port, &c, bytes, &len);
 
-  for (i = len - 1; i >= 0; i--)
-    scm_unget_byte (bytes[i], port);
+  scm_i_unget_bytes ((unsigned char *) bytes, len, port);
 
   SCM_COL (port) = column;
   SCM_LINUM (port) = line;
@@ -2336,7 +2368,6 @@ static int
 looking_at_bytes (SCM port, const unsigned char *bytes, int len)
 {
   scm_t_port *pt = SCM_PTAB_ENTRY (port);
-  int result;
   int i = 0;
 
   while (i < len && scm_peek_byte_or_eof (port) == bytes[i])
@@ -2344,13 +2375,8 @@ looking_at_bytes (SCM port, const unsigned char *bytes, int len)
       pt->read_pos++;
       i++;
     }
-
-  result = (i == len);
-
-  while (i > 0)
-    scm_unget_byte (bytes[--i], port);
-
-  return result;
+  scm_i_unget_bytes (bytes, i, port);
+  return (i == len);
 }
 
 static const unsigned char scm_utf8_bom[3]    = {0xEF, 0xBB, 0xBF};
diff --git a/libguile/ports.h b/libguile/ports.h
index ca5bf2f..39317f8 100644
--- a/libguile/ports.h
+++ b/libguile/ports.h
@@ -302,6 +302,7 @@ SCM_INTERNAL void scm_lfwrite_substr (SCM str, size_t start, size_t end,
 SCM_API void scm_flush (SCM port);
 SCM_API void scm_end_input (SCM port);
 SCM_API int scm_fill_input (SCM port);
+SCM_API void scm_unget_bytes (const unsigned char *buf, size_t len, SCM port);
 SCM_API void scm_unget_byte (int c, SCM port);
 SCM_API void scm_ungetc (scm_t_wchar c, SCM port);
 SCM_API void scm_ungets (const char *s, int n, SCM port);
-- 
1.7.10.4


             reply	other threads:[~2013-04-06  6:28 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-04-06  6:28 Mark H Weaver [this message]
2013-04-06  7:39 ` [PATCH] Implement efficient 'scm_unget_bytes' and use it Chris K. Jester-Young
2013-04-06  7:47 ` [PATCH] Implement efficient 'scm_unget_bytes' and 'unget-bytevector' Mark H Weaver
2013-04-06 10:01   ` Mike Gran
2013-04-06 14:08     ` Mark H Weaver
2013-04-06 23:07   ` Ludovic Courtès
2013-04-07  7:19     ` Mark H Weaver
2013-04-07  9:28       ` Ludovic Courtès
2013-04-07 13:01         ` Mark H Weaver
2013-04-07 14:25           ` Ludovic Courtès

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://www.gnu.org/software/guile/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87d2u8uq9t.fsf@tines.lan \
    --to=mhw@netris.org \
    --cc=guile-devel@gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).