unofficial mirror of bug-guile@gnu.org 
 help / color / mirror / Atom feed
From: Mark H Weaver <mhw@netris.org>
To: David Kastrup <dak@gnu.org>
Cc: 20302@debbugs.gnu.org
Subject: bug#20302: peek-char messes up file position on binary string ports
Date: Sun, 06 Sep 2015 07:55:02 -0400	[thread overview]
Message-ID: <87oahfoj8p.fsf@netris.org> (raw)
In-Reply-To: <87383z5nmj.fsf@netris.org> (Mark H. Weaver's message of "Fri, 17 Apr 2015 01:29:08 -0400")

[-- Attachment #1: Type: text/plain, Size: 945 bytes --]

Mark H Weaver <mhw@netris.org> writes:

> David Kastrup <dak@gnu.org> writes:
>
>> (use-modules (rnrs bytevectors) (rnrs io ports))
>> (let ((port (open-bytevector-input-port
>> 	     (string->utf8 "Blablabla\nBlablabla\n"))))
>>   (seek port 13 SEEK_SET)
>>   (format #t "~c ~d\n" (peek-char port)
>> 	  (ftell port)))
>> ;; Outputs b 3 but should output b 13
>>
>> This is using
>> guile (GNU Guile) 2.0.11
>> Packaged by Debian (2.0.11-deb+1-1)
>
> Ouch :-(
>
> The problem is that r6rs-ports.c:bip_seek assumes that
> c_port->read_{buf,pos,end} point to the original bytevector, and fail to
> handle the case where it points to a "putback" buffer.
>
> Note that (ftell port) is equivalent to (seek port 0 SEEK_CUR).

I've attached a preliminary patch set to fix this bug and some others.
I'm going to hold off on pushing them to stable-2.0 until I've added
tests and convinced myself that I haven't introduced any new problems.

     Mark



[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: [PATCH 1/3] build: Add SCM_T_OFF_MAX and SCM_T_OFF_MIN to scmconfig.h --]
[-- Type: text/x-patch, Size: 1191 bytes --]

From 1d398a5ee910f3edf17b8b86e29a7fbe967071ec Mon Sep 17 00:00:00 2001
From: Mark H Weaver <mhw@netris.org>
Date: Sun, 6 Sep 2015 07:33:55 -0400
Subject: [PATCH 1/3] build: Add SCM_T_OFF_MAX and SCM_T_OFF_MIN to
 scmconfig.h.

* libguile/gen-scmconfig.c (main): Add SCM_T_OFF_MAX and SCM_T_OFF_MIN
  to the generated 'scmconfig.h' file.
---
 libguile/gen-scmconfig.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/libguile/gen-scmconfig.c b/libguile/gen-scmconfig.c
index 2f6fa6e..e433bd1 100644
--- a/libguile/gen-scmconfig.c
+++ b/libguile/gen-scmconfig.c
@@ -376,10 +376,16 @@ main (int argc, char *argv[])
 
 #if defined GUILE_USE_64_CALLS && defined HAVE_STAT64
   pf ("typedef scm_t_int64 scm_t_off;\n");
+  pf ("#define SCM_T_OFF_MAX SCM_T_INT64_MAX\n");
+  pf ("#define SCM_T_OFF_MIN SCM_T_INT64_MIN\n");
 #elif SIZEOF_OFF_T == SIZEOF_INT
   pf ("typedef int scm_t_off;\n");
+  pf ("#define SCM_T_OFF_MAX INT_MAX\n");
+  pf ("#define SCM_T_OFF_MIN INT_MIN\n");
 #else
   pf ("typedef long int scm_t_off;\n");
+  pf ("#define SCM_T_OFF_MAX LONG_MAX\n");
+  pf ("#define SCM_T_OFF_MIN LONG_MIN\n");
 #endif
 
   pf ("/* Define to 1 if the compiler supports the "
-- 
2.5.0


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #3: [PATCH 2/3] PRELIMINARY: Fix seeking on binary input ports with putback buffers --]
[-- Type: text/x-patch, Size: 3998 bytes --]

From 53a6a5f7a66ac1f7b92a7347dc9486db14c73df5 Mon Sep 17 00:00:00 2001
From: Mark H Weaver <mhw@netris.org>
Date: Sun, 6 Sep 2015 07:35:58 -0400
Subject: [PATCH 2/3] PRELIMINARY: Fix seeking on binary input ports with
 putback buffers.

Fixes <http://bugs.gnu.org/20302>.
Reported by David Kastrup <dak@gnu.org>.

* libguile/r6rs-ports.c (bip_end_input): New static function.
  (initialize_bytevector_input_ports): Register it.
  (bip_seek): Rewrite to handle putback buffers, based on st_seek from
  strports.c.
---
 libguile/r6rs-ports.c | 83 +++++++++++++++++++++++++++++++++------------------
 1 file changed, 54 insertions(+), 29 deletions(-)

diff --git a/libguile/r6rs-ports.c b/libguile/r6rs-ports.c
index a17b7b4..1bf766c 100644
--- a/libguile/r6rs-ports.c
+++ b/libguile/r6rs-ports.c
@@ -125,45 +125,69 @@ bip_fill_input (SCM port)
   return result;
 }
 
+static void
+bip_end_input (SCM port, int offset)
+{
+  scm_t_port *c_port = SCM_PTAB_ENTRY (port);
+  
+  if (c_port->read_pos - c_port->read_buf < offset)
+    scm_misc_error ("bip_end_input", "negative position", SCM_EOL);
+
+  c_port->read_pos -= offset;
+}
+
 static scm_t_off
 bip_seek (SCM port, scm_t_off offset, int whence)
 #define FUNC_NAME "bip_seek"
 {
-  scm_t_off c_result = 0;
   scm_t_port *c_port = SCM_PTAB_ENTRY (port);
+  scm_t_off target;
 
-  switch (whence)
+  if (offset == 0 && whence == SEEK_CUR)
+    /* special case to avoid disturbing the putback buffer.  */
     {
-    case SEEK_CUR:
-      offset += c_port->read_pos - c_port->read_buf;
-      /* Fall through.  */
-
-    case SEEK_SET:
-      if (c_port->read_buf + offset <= c_port->read_end)
-	{
-	  c_port->read_pos = c_port->read_buf + offset;
-	  c_result = offset;
-	}
+      if (c_port->read_buf == c_port->putback_buf)
+        target = c_port->saved_read_pos - c_port->saved_read_buf
+          - (c_port->read_end - c_port->read_pos);
       else
-	scm_out_of_range (FUNC_NAME, scm_from_int (offset));
-      break;
+        target = c_port->read_pos - c_port->read_buf;
+    }
+  else
+    {
+      scm_t_off base = 0;
 
-    case SEEK_END:
-      if (c_port->read_end - offset >= c_port->read_buf)
-	{
-	  c_port->read_pos = c_port->read_end - offset;
-	  c_result = c_port->read_pos - c_port->read_buf;
-	}
-      else
-	scm_out_of_range (FUNC_NAME, scm_from_int (offset));
-      break;
+      /* If the putback buffer is currently active, this will dump its
+         contents, switch back to the main read buffer, and move
+         read_pos backwards as needed to account for the bytes that were
+         put back. */
+      if (c_port->read_buf == c_port->putback_buf)
+        scm_end_input (port);
 
-    default:
-      scm_wrong_type_arg_msg (FUNC_NAME, 0, port,
-			      "invalid `seek' parameter");
-    }
+      switch (whence)
+        {
+        case SEEK_CUR:
+          base = c_port->read_pos - c_port->read_buf;
+          break;
+        case SEEK_END:
+          base = c_port->read_end - c_port->read_buf;
+          break;
+        case SEEK_SET:
+          base = 0;
+          break;
+        default:
+          scm_wrong_type_arg_msg (FUNC_NAME, 0, port,
+                                  "invalid `whence' argument");
+        }
 
-  return c_result;
+      if (offset > SCM_T_OFF_MAX - base)  /* Overflow check */
+        scm_out_of_range (FUNC_NAME, scm_from_int (offset));
+      target = base + offset;
+      if (target < 0 || target > c_port->read_end - c_port->read_buf)
+        scm_out_of_range (FUNC_NAME, scm_from_int (offset));
+
+      c_port->read_pos = c_port->read_buf + target;
+    }
+  return target;
 }
 #undef FUNC_NAME
 
@@ -176,7 +200,8 @@ initialize_bytevector_input_ports (void)
     scm_make_port_type ("r6rs-bytevector-input-port", bip_fill_input,
 			NULL);
 
-  scm_set_port_seek (bytevector_input_port_type, bip_seek);
+  scm_set_port_end_input (bytevector_input_port_type, bip_end_input);
+  scm_set_port_seek      (bytevector_input_port_type, bip_seek);
 }
 
 
-- 
2.5.0


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #4: [PATCH 3/3] PRELIMINARY: string ports: Add overflow checks and other fixes --]
[-- Type: text/x-patch, Size: 4630 bytes --]

From dc09359733feb68590c905d77e2172ec6e3781d0 Mon Sep 17 00:00:00 2001
From: Mark H Weaver <mhw@netris.org>
Date: Sun, 6 Sep 2015 07:42:07 -0400
Subject: [PATCH 3/3] PRELIMINARY: string ports: Add overflow checks and other
 fixes.

* libguile/strports.c (st_resize_port): Check that 'new_size' fits in a
  size_t.
  (st_end_input): Improve code clarity.
  (st_seek): Check for overflow during computation of target position.
  Check for invalid 'whence' argument.  Resize the port when seeking to
  a position beyond the end of the buffer.  Check for overflow during
  computation of new buffer size when resizing the port.
---
 libguile/strports.c | 64 +++++++++++++++++++++++++++++++++--------------------
 1 file changed, 40 insertions(+), 24 deletions(-)

diff --git a/libguile/strports.c b/libguile/strports.c
index f306019..18f1970 100644
--- a/libguile/strports.c
+++ b/libguile/strports.c
@@ -103,28 +103,33 @@ stfill_buffer (SCM port)
 static void
 st_resize_port (scm_t_port *pt, scm_t_off new_size)
 {
-  SCM old_stream = SCM_PACK (pt->stream);
-  const signed char *src = SCM_BYTEVECTOR_CONTENTS (old_stream);
-  SCM new_stream = scm_c_make_bytevector (new_size);
-  signed char *dst = SCM_BYTEVECTOR_CONTENTS (new_stream);
-  unsigned long int old_size = SCM_BYTEVECTOR_LENGTH (old_stream);
-  unsigned long int min_size = min (old_size, new_size);
+  if (new_size < 0 || new_size > SCM_I_SIZE_MAX)
+    scm_misc_error ("st_resize_port", "new_size overflow", SCM_EOL);
 
-  scm_t_off index = pt->write_pos - pt->write_buf;
+  {
+    SCM old_stream = SCM_PACK (pt->stream);
+    const signed char *src = SCM_BYTEVECTOR_CONTENTS (old_stream);
+    SCM new_stream = scm_c_make_bytevector (new_size);
+    signed char *dst = SCM_BYTEVECTOR_CONTENTS (new_stream);
+    unsigned long int old_size = SCM_BYTEVECTOR_LENGTH (old_stream);
+    unsigned long int min_size = min (old_size, new_size);
 
-  pt->write_buf_size = new_size;
+    scm_t_off index = pt->write_pos - pt->write_buf;
 
-  memcpy (dst, src, min_size);
+    pt->write_buf_size = new_size;
 
-  scm_remember_upto_here_1 (old_stream);
+    memcpy (dst, src, min_size);
 
-  /* reset buffer. */
-  {
-    pt->stream = SCM_UNPACK (new_stream);
-    pt->read_buf = pt->write_buf = (unsigned char *)dst;
-    pt->read_pos = pt->write_pos = pt->write_buf + index;
-    pt->write_end = pt->write_buf + pt->write_buf_size;
-    pt->read_end = pt->read_buf + pt->read_buf_size;
+    scm_remember_upto_here_1 (old_stream);
+
+    /* reset buffer. */
+    {
+      pt->stream = SCM_UNPACK (new_stream);
+      pt->read_buf = pt->write_buf = (unsigned char *)dst;
+      pt->read_pos = pt->write_pos = pt->write_buf + index;
+      pt->write_end = pt->write_buf + pt->write_buf_size;
+      pt->read_end = pt->read_buf + pt->read_buf_size;
+    }
   }
 }
 
@@ -176,7 +181,8 @@ st_end_input (SCM port, int offset)
   if (pt->read_pos - pt->read_buf < offset)
     scm_misc_error ("st_end_input", "negative position", SCM_EOL);
 
-  pt->write_pos = (unsigned char *) (pt->read_pos = pt->read_pos - offset);
+  pt->read_pos -= offset;
+  pt->write_pos = (unsigned char *) pt->read_pos;
   pt->rw_active = SCM_PORT_NEITHER;
 }
 
@@ -202,6 +208,8 @@ st_seek (SCM port, scm_t_off offset, int whence)
   else
     /* all other cases.  */
     {
+      scm_t_off base = 0;
+
       if (pt->rw_active == SCM_PORT_WRITE)
 	st_flush (port);
   
@@ -211,18 +219,24 @@ st_seek (SCM port, scm_t_off offset, int whence)
       switch (whence)
 	{
 	case SEEK_CUR:
-	  target = pt->read_pos - pt->read_buf + offset;
+	  base = pt->read_pos - pt->read_buf;
 	  break;
 	case SEEK_END:
-	  target = pt->read_end - pt->read_buf + offset;
+	  base = pt->read_end - pt->read_buf;
 	  break;
-	default: /* SEEK_SET */
-	  target = offset;
+	case SEEK_SET:
+	  base = 0;
 	  break;
+	default:
+	  scm_wrong_type_arg_msg ("st_seek", 0, port,
+				  "invalid `whence' argument");
 	}
 
+      if (offset > SCM_T_OFF_MAX - base)
+	scm_misc_error ("st_seek", "target would overflow", SCM_EOL);
+      target = base + offset;
       if (target < 0)
-	scm_misc_error ("st_seek", "negative offset", SCM_EOL);
+	scm_misc_error ("st_seek", "negative target", SCM_EOL);
   
       if (target >= pt->write_buf_size)
 	{
@@ -235,7 +249,9 @@ st_seek (SCM port, scm_t_off offset, int whence)
 				  SCM_EOL);
 		}
 	    }
-	  else if (target == pt->write_buf_size)
+	  else if (target > SCM_T_OFF_MAX - target)
+	    scm_misc_error ("st_seek", "target * 2 would overflow", SCM_EOL);
+	  else
 	    st_resize_port (pt, target * 2);
 	}
       pt->read_pos = pt->write_pos = pt->read_buf + target;
-- 
2.5.0


  reply	other threads:[~2015-09-06 11:55 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-04-11 11:48 bug#20302: peek-char messes up file position on binary string ports David Kastrup
2015-04-17  5:29 ` Mark H Weaver
2015-09-06 11:55   ` Mark H Weaver [this message]
2015-11-04 21:12     ` Mark H Weaver
     [not found] ` <handler.20302.B.142875290514957.ack@debbugs.gnu.org>
2015-08-31  9:09   ` bug#20302: Acknowledgement (peek-char messes up file position on binary string ports) David Kastrup

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://www.gnu.org/software/guile/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87oahfoj8p.fsf@netris.org \
    --to=mhw@netris.org \
    --cc=20302@debbugs.gnu.org \
    --cc=dak@gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).