From 6a3feb79b289410a62d2e0c8a70e0ea59d0cf8cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludovic=20Court=C3=A8s?= Date: Tue, 21 Jan 2014 02:28:35 -0500 Subject: [PATCH 2/2] Custom binary input ports support 'setvbuf'. Modified-by: Mark H Weaver * libguile/r6rs-ports.c (CBIP_BUFFER_SIZE): Adjust comment. Set to 8KiB. (SCM_SET_CBIP_BYTEVECTOR): New macro. (cbip_setvbuf): New function. (cbip_fill_input): Check whether PORT is buffered. When unbuffered, check whether BV can hold C_REQUESTED bytes, and allocate a new bytevector if not; copy the data back from BV to c_port->read_pos. Remove 'again' label, and don't loop there. (initialize_custom_binary_input_ports): Set PTOB's 'setvbuf' method. * test-suite/tests/r6rs-ports.test ("7.2.7 Input Ports")["custom binary input port unbuffered & 'port-position'", "custom binary input port unbuffered & 'read!' calls", "custom binary input port, unbuffered then buffered", "custom binary input port, buffered then unbuffered"]: New tests. * doc/ref/api-io.texi (R6RS Binary Input): Document the buffering of custom binary input ports, and link to 'setvbuf'. --- doc/ref/api-io.texi | 4 + libguile/r6rs-ports.c | 89 +++++++++++++++++++++++----- test-suite/tests/r6rs-ports.test | 123 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 201 insertions(+), 15 deletions(-) diff --git a/doc/ref/api-io.texi b/doc/ref/api-io.texi index edf38be..49e2a05 100644 --- a/doc/ref/api-io.texi +++ b/doc/ref/api-io.texi @@ -1792,6 +1792,10 @@ indicating the position of the next byte is to read. Finally, if @var{close} is not @code{#f}, it must be a thunk. It is invoked when the custom binary input port is closed. +The returned port is fully buffered by default, but its buffering mode +can be changed using @code{setvbuf} (@pxref{Ports and File Descriptors, +@code{setvbuf}}). + Using a custom binary input port, the @code{open-bytevector-input-port} procedure could be implemented as follows: diff --git a/libguile/r6rs-ports.c b/libguile/r6rs-ports.c index a8fc3f1..5b8da1d 100644 --- a/libguile/r6rs-ports.c +++ b/libguile/r6rs-ports.c @@ -272,18 +272,59 @@ cbp_close (SCM port) static scm_t_bits custom_binary_input_port_type = 0; -/* Size of the buffer embedded in custom binary input ports. */ -#define CBIP_BUFFER_SIZE 4096 +/* Initial size of the buffer embedded in custom binary input ports. */ +#define CBIP_BUFFER_SIZE 8192 /* Return the bytevector associated with PORT. */ #define SCM_CBIP_BYTEVECTOR(_port) \ SCM_SIMPLE_VECTOR_REF (SCM_PACK (SCM_STREAM (_port)), 4) +/* Set BV as the bytevector associated with PORT. */ +#define SCM_SET_CBIP_BYTEVECTOR(_port, _bv) \ + SCM_SIMPLE_VECTOR_SET (SCM_PACK (SCM_STREAM (_port)), 4, (_bv)) + /* Return the various procedures of PORT. */ #define SCM_CBIP_READ_PROC(_port) \ SCM_SIMPLE_VECTOR_REF (SCM_PACK (SCM_STREAM (_port)), 0) +/* Set PORT's internal buffer according to READ_SIZE. */ +static void +cbip_setvbuf (SCM port, long read_size, long write_size) +{ + SCM bv; + scm_t_port *pt; + + pt = SCM_PTAB_ENTRY (port); + bv = SCM_CBIP_BYTEVECTOR (port); + + switch (read_size) + { + case 0: + /* Unbuffered: keep PORT's bytevector as is (it will be used in + future 'scm_c_read' calls), but point to the one-byte buffer. */ + pt->read_buf = &pt->shortbuf; + pt->read_buf_size = 1; + break; + + case -1: + /* Preferred size: keep the current bytevector and use it as the + backing store. */ + pt->read_buf = (unsigned char *) SCM_BYTEVECTOR_CONTENTS (bv); + pt->read_buf_size = SCM_BYTEVECTOR_LENGTH (bv); + break; + + default: + /* Fully buffered: allocate a buffer of READ_SIZE bytes. */ + bv = scm_c_make_bytevector (read_size); + SCM_SET_CBIP_BYTEVECTOR (port, bv); + pt->read_buf = (unsigned char *) SCM_BYTEVECTOR_CONTENTS (bv); + pt->read_buf_size = read_size; + } + + pt->read_pos = pt->read_end = pt->read_buf; +} + static inline SCM make_cbip (SCM read_proc, SCM get_position_proc, SCM set_position_proc, SCM close_proc) @@ -330,26 +371,39 @@ cbip_fill_input (SCM port) int result; scm_t_port *c_port = SCM_PTAB_ENTRY (port); - again: if (c_port->read_pos >= c_port->read_end) { /* Invoke the user's `read!' procedure. */ + int buffered; size_t c_octets, c_requested; SCM bv, read_proc, octets; c_requested = c_port->read_buf_size; + read_proc = SCM_CBIP_READ_PROC (port); - /* Use the bytevector associated with PORT as the buffer passed to the - `read!' procedure, thereby avoiding additional allocations. */ bv = SCM_CBIP_BYTEVECTOR (port); - read_proc = SCM_CBIP_READ_PROC (port); + buffered = + (c_port->read_buf == (unsigned char *) SCM_BYTEVECTOR_CONTENTS (bv)); - /* The assumption here is that C_PORT's internal buffer wasn't changed - behind our back. */ - assert (c_port->read_buf == - (unsigned char *) SCM_BYTEVECTOR_CONTENTS (bv)); - assert ((unsigned) c_port->read_buf_size - == SCM_BYTEVECTOR_LENGTH (bv)); + if (buffered) + /* Make sure the buffer isn't corrupt. BV can be passed directly + to READ_PROC. */ + assert (c_port->read_buf_size == SCM_BYTEVECTOR_LENGTH (bv)); + else + { + /* This is an unbuffered port. When called via the + 'get-bytevector-*' procedures, and thus via 'scm_c_read', we + are passed the caller-provided buffer, so we need to check its + size. */ + if (SCM_BYTEVECTOR_LENGTH (bv) < c_requested) + { + /* Bad luck: we have to make another allocation. Save that + bytevector for later reuse, in the hope that the application + has regular access patterns. */ + bv = scm_c_make_bytevector (c_requested); + SCM_SET_CBIP_BYTEVECTOR (port, bv); + } + } octets = scm_call_3 (read_proc, bv, SCM_INUM0, scm_from_size_t (c_requested)); @@ -357,11 +411,15 @@ cbip_fill_input (SCM port) if (SCM_UNLIKELY (c_octets > c_requested)) scm_out_of_range (FUNC_NAME, octets); - c_port->read_pos = (unsigned char *) SCM_BYTEVECTOR_CONTENTS (bv); + if (!buffered) + /* Copy the data back to the internal buffer. */ + memcpy ((char *) c_port->read_pos, SCM_BYTEVECTOR_CONTENTS (bv), + c_octets); + c_port->read_end = (unsigned char *) c_port->read_pos + c_octets; - if (c_octets > 0) - goto again; + if (c_octets != 0 || c_requested == 0) + result = (int) *c_port->read_pos; else result = EOF; } @@ -410,6 +468,7 @@ initialize_custom_binary_input_ports (void) scm_set_port_seek (custom_binary_input_port_type, cbp_seek); scm_set_port_close (custom_binary_input_port_type, cbp_close); + scm_set_port_setvbuf (custom_binary_input_port_type, cbip_setvbuf); } diff --git a/test-suite/tests/r6rs-ports.test b/test-suite/tests/r6rs-ports.test index 4bd8a70..339679f 100644 --- a/test-suite/tests/r6rs-ports.test +++ b/test-suite/tests/r6rs-ports.test @@ -456,6 +456,129 @@ not `set-port-position!'" (u8-list->bytevector (map char->integer (string->list "Port!"))))))) + (pass-if-equal "custom binary input port unbuffered & 'port-position'" + '(0 2 5 11) + ;; Check that the value returned by 'port-position' is correct, and + ;; that each 'port-position' call leads one call to the + ;; 'get-position' method. + (let* ((str "Hello Port!") + (output (make-bytevector (string-length str))) + (source (with-fluids ((%default-port-encoding "UTF-8")) + (open-string-input-port str))) + (read! (lambda (bv start count) + (let ((r (get-bytevector-n! source bv start count))) + (if (eof-object? r) + 0 + r)))) + (pos '()) + (get-pos (lambda () + (let ((p (port-position source))) + (set! pos (cons p pos)) + p))) + (port (make-custom-binary-input-port "the port" read! + get-pos #f #f))) + (setvbuf port _IONBF) + (and (= 0 (port-position port)) + (begin + (get-bytevector-n! port output 0 2) + (= 2 (port-position port))) + (begin + (get-bytevector-n! port output 2 3) + (= 5 (port-position port))) + (let ((bv (string->utf8 (get-string-all port)))) + (bytevector-copy! bv 0 output 5 (bytevector-length bv)) + (= (string-length str) (port-position port))) + (bytevector=? output (string->utf8 str)) + (reverse pos)))) + + (pass-if-equal "custom binary input port unbuffered & 'read!' calls" + `((2 "He") (3 "llo") (42 " Port!")) + (let* ((str "Hello Port!") + (source (with-fluids ((%default-port-encoding "UTF-8")) + (open-string-input-port str))) + (reads '()) + (read! (lambda (bv start count) + (set! reads (cons count reads)) + (let ((r (get-bytevector-n! source bv start count))) + (if (eof-object? r) + 0 + r)))) + (port (make-custom-binary-input-port "the port" read! + #f #f #f))) + + (setvbuf port _IONBF) + (let ((ret (list (get-bytevector-n port 2) + (get-bytevector-n port 3) + (get-bytevector-n port 42)))) + (zip (reverse reads) + (map (lambda (obj) + (if (bytevector? obj) + (utf8->string obj) + obj)) + ret))))) + + (pass-if-equal "custom binary input port, unbuffered then buffered" + `((6 "Lorem ") (12 "ipsum dolor ") (777 "sit amet, consectetur…") + (777 ,(eof-object))) + (let* ((str "Lorem ipsum dolor sit amet, consectetur…") + (source (with-fluids ((%default-port-encoding "UTF-8")) + (open-string-input-port str))) + (reads '()) + (read! (lambda (bv start count) + (set! reads (cons count reads)) + (let ((r (get-bytevector-n! source bv start count))) + (if (eof-object? r) + 0 + r)))) + (port (make-custom-binary-input-port "the port" read! + #f #f #f))) + + (setvbuf port _IONBF) + (let ((ret (list (get-bytevector-n port 6) + (get-bytevector-n port 12) + (begin + (setvbuf port _IOFBF 777) + (get-bytevector-n port 42)) + (get-bytevector-n port 42)))) + (zip (reverse reads) + (map (lambda (obj) + (if (bytevector? obj) + (utf8->string obj) + obj)) + ret))))) + + (pass-if-equal "custom binary input port, buffered then unbuffered" + `((18 + 42 14 ; scm_c_read tries to fill the 42-byte buffer + 42) + ("Lorem " "ipsum dolor " "sit amet, consectetur bla…" ,(eof-object))) + (let* ((str "Lorem ipsum dolor sit amet, consectetur bla…") + (source (with-fluids ((%default-port-encoding "UTF-8")) + (open-string-input-port str))) + (reads '()) + (read! (lambda (bv start count) + (set! reads (cons count reads)) + (let ((r (get-bytevector-n! source bv start count))) + (if (eof-object? r) + 0 + r)))) + (port (make-custom-binary-input-port "the port" read! + #f #f #f))) + + (setvbuf port _IOFBF 18) + (let ((ret (list (get-bytevector-n port 6) + (get-bytevector-n port 12) + (begin + (setvbuf port _IONBF) + (get-bytevector-n port 42)) + (get-bytevector-n port 42)))) + (list (reverse reads) + (map (lambda (obj) + (if (bytevector? obj) + (utf8->string obj) + obj)) + ret))))) + (pass-if "custom binary input port `close-proc' is called" (let* ((closed? #f) (read! (lambda (bv start count) 0)) -- 1.7.5.4