--- orig/ChangeLog
+++ mod/ChangeLog
@@ -1,3 +1,7 @@
+2007-07-22  Ludovic Courtès  <ludo@gnu.org>
+
+	* configure.in: Check for <strings.h> and `strncasecmp ()'.
+
 2007-07-19  Ludovic Courtès  <ludo@gnu.org>
 
 	* NEWS: Mention `(ice-9 i18n)' and lazy duplicate binding


--- orig/configure.in
+++ mod/configure.in
@@ -546,7 +546,7 @@
 regex.h rxposix.h rx/rxposix.h sys/dir.h sys/ioctl.h sys/select.h \
 sys/time.h sys/timeb.h sys/times.h sys/stdtypes.h sys/types.h \
 sys/utime.h time.h unistd.h utime.h pwd.h grp.h sys/utsname.h \
-direct.h langinfo.h nl_types.h])
+strings.h direct.h langinfo.h nl_types.h])
 
 # "complex double" is new in C99, and "complex" is only a keyword if
 # <complex.h> is included
@@ -638,7 +638,7 @@
 #   strcoll_l, newlocale - GNU extensions (glibc), also available on Darwin
 #   nl_langinfo - X/Open, not available on Windows.
 #
-AC_CHECK_FUNCS([DINFINITY DQNAN chsize clog10 ctermid fesetround ftime ftruncate fchown getcwd geteuid gettimeofday gmtime_r ioctl lstat mkdir mknod nice pipe _pipe readdir_r readlink rename rmdir select setegid seteuid setlocale setpgid setsid sigaction siginterrupt stat64 strftime strptime symlink sync sysconf tcgetpgrp tcsetpgrp times uname waitpid strdup system usleep atexit on_exit chown link fcntl ttyname getpwent getgrent kill getppid getpgrp fork setitimer getitimer strchr strcmp index bcopy memcpy rindex truncate unsetenv isblank _NSGetEnviron strcoll strcoll_l newlocale nl_langinfo])
+AC_CHECK_FUNCS([DINFINITY DQNAN chsize clog10 ctermid fesetround ftime ftruncate fchown getcwd geteuid gettimeofday gmtime_r ioctl lstat mkdir mknod nice pipe _pipe readdir_r readlink rename rmdir select setegid seteuid setlocale setpgid setsid sigaction siginterrupt stat64 strftime strptime symlink sync sysconf tcgetpgrp tcsetpgrp times uname waitpid strdup system usleep atexit on_exit chown link fcntl ttyname getpwent getgrent kill getppid getpgrp fork setitimer getitimer strchr strcmp index bcopy memcpy rindex truncate unsetenv isblank _NSGetEnviron strncasecmp strcoll strcoll_l newlocale nl_langinfo])
 
 # Reasons for testing:
 #   netdb.h - not in mingw


--- orig/libguile/ChangeLog
+++ mod/libguile/ChangeLog
@@ -1,3 +1,18 @@
+2007-07-22  Ludovic Courtès  <ludo@gnu.org>
+
+	Overhauled the reader, making it faster.
+
+	* gdbint.c (tok_buf, tok_buf_mark_p): Removed.
+	(gdb_read): Don't use a token buffer.  Use `scm_read ()' instead
+	of `scm_lreadr ()'.
+
+	* read.c: Overhauled.  No longer use a token buffer.  Use a
+	on-stack C buffer in the common case and use Scheme strings when
+	larger buffers are needed.
+	* read.h (scm_grow_tok_buf, scm_flush_ws, scm_casei_streq,
+	scm_lreadr, scm_lreadrecparen): Removed.
+	(scm_i_input_error): Marked as `SCM_NORETURN'.
+
 2007-07-15  Ludovic Courtès  <ludo@gnu.org>
 
 	* script.c (scm_compile_shell_switches): Updated copyright year.


--- orig/libguile/gdbint.c
+++ mod/libguile/gdbint.c
@@ -103,9 +103,6 @@
 static SCM gdb_input_port;
 static int port_mark_p, stream_mark_p, string_mark_p;
 
-static SCM tok_buf;
-static int tok_buf_mark_p;
-
 static SCM gdb_output_port;
 
 
@@ -184,10 +181,9 @@
   scm_puts (str, gdb_input_port);
   scm_truncate_file (gdb_input_port, SCM_UNDEFINED);
   scm_seek (gdb_input_port, SCM_INUM0, scm_from_int (SEEK_SET));
+
   /* Read one object */
-  tok_buf_mark_p = SCM_GC_MARK_P (tok_buf);
-  SCM_CLEAR_GC_MARK (tok_buf);
-  ans = scm_lreadr (&tok_buf, gdb_input_port, &ans);
+  ans = scm_read (gdb_input_port);
   if (SCM_GC_P)
     {
       if (SCM_NIMP (ans))
@@ -202,8 +198,6 @@
   if (SCM_NIMP (ans))
     scm_permanent_object (ans);
 exit:
-  if (tok_buf_mark_p)
-    SCM_SET_GC_MARK (tok_buf);
   remark_port (gdb_input_port);
   SCM_END_FOREIGN_BLOCK;
   return status;
@@ -292,8 +286,6 @@
 			SCM_OPN | SCM_RDNG | SCM_WRTNG,
 			s);
   gdb_input_port = scm_permanent_object (port);
-
-  tok_buf = scm_permanent_object (scm_c_make_string (30, SCM_UNDEFINED));
 }
 
 /*


--- orig/libguile/read.c
+++ mod/libguile/read.c
@@ -19,7 +19,17 @@
 
 
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
 #include <stdio.h>
+#include <ctype.h>
+#include <string.h>
+#ifdef HAVE_STRINGS_H
+# include <strings.h>
+#endif
+
 #include "libguile/_scm.h"
 #include "libguile/chars.h"
 #include "libguile/eval.h"
@@ -36,6 +46,7 @@
 #include "libguile/vectors.h"
 #include "libguile/validate.h"
 #include "libguile/srfi-4.h"
+#include "libguile/srfi-13.h"
 
 #include "libguile/read.h"
 #include "libguile/private-options.h"
@@ -124,77 +135,114 @@
 /* An association list mapping extra hash characters to procedures.  */
 static SCM *scm_read_hash_procedures;
 
-SCM_DEFINE (scm_read, "read", 0, 1, 0, 
-            (SCM port),
-	    "Read an s-expression from the input port @var{port}, or from\n"
-	    "the current input port if @var{port} is not specified.\n"
-	    "Any whitespace before the next token is discarded.")
-#define FUNC_NAME s_scm_read
-{
-  int c;
-  SCM tok_buf, copy;
 
-  if (SCM_UNBNDP (port))
-    port = scm_current_input_port ();
-  SCM_VALIDATE_OPINPORT (1, port);
+
+/* Token readers.  */
 
-  c = scm_flush_ws (port, (char *) NULL);
-  if (EOF == c)
-    return SCM_EOF_VAL;
-  scm_ungetc (c, port);
 
-  tok_buf = scm_c_make_string (30, SCM_UNDEFINED);
-  return scm_lreadr (&tok_buf, port, &copy);
-}
-#undef FUNC_NAME
+/* Size of the C buffer used to read symbols and numbers.  */
+#define READER_BUFFER_SIZE            128
 
+/* Size of the C buffer used to read strings.  */
+#define READER_STRING_BUFFER_SIZE     512
 
+/* The maximum size of Scheme character names.  */
+#define READER_CHAR_NAME_MAX_SIZE      50
+
+
+/* `isblank' is only in C99.  */
+#define CHAR_IS_BLANK_(_chr)					\
+  (((_chr) == ' ') || ((_chr) == '\t') || ((_chr) == '\n')	\
+   || ((_chr) == '\f'))
+
+#ifdef MSDOS
+# define CHAR_IS_BLANK(_chr)			\
+  ((CHAR_IS_BLANK_ (chr)) || ((_chr) == 26))
+#else
+# define CHAR_IS_BLANK CHAR_IS_BLANK_
+#endif
+
+
+/* R5RS one-character delimiters (see section 7.1.1, ``Lexical
+   structure'').  */
+#define CHAR_IS_R5RS_DELIMITER(c)				\
+  (CHAR_IS_BLANK (c)						\
+   || (c == ')') || (c == '(') || (c == ';') || (c == '"'))
+
+#define CHAR_IS_DELIMITER  CHAR_IS_R5RS_DELIMITER
+
+/* Exponent markers, as defined in section 7.1.1 of R5RS, ``Lexical
+   Structure''.  */
+#define CHAR_IS_EXPONENT_MARKER(_chr)				\
+  (((_chr) == 'e') || ((_chr) == 's') || ((_chr) == 'f')	\
+   || ((_chr) == 'd') || ((_chr) == 'l'))
+
+/* An inlinable version of `scm_c_downcase ()'.  */
+#define CHAR_DOWNCASE(_chr)				\
+  (((_chr) <= UCHAR_MAX) ? tolower (_chr) : (_chr))
 
-char *
-scm_grow_tok_buf (SCM *tok_buf)
-{
-  size_t oldlen = scm_i_string_length (*tok_buf);
-  const char *olddata = scm_i_string_chars (*tok_buf);
-  char *newdata;
-  SCM newstr = scm_i_make_string (2 * oldlen, &newdata);
-  size_t i;
 
-  for (i = 0; i != oldlen; ++i)
-    newdata[i] = olddata[i];
+#ifndef HAVE_STRNCASECMP
+/* XXX: Use Gnulib's `strncasecmp ()'.  */
+
+static int
+strncasecmp (const char *s1, const char *s2, size_t len2)
+{
+  while (*s1 && *s2 && len2 > 0)
+    {
+      int c1 = *s1, c2 = *s2;
 
-  *tok_buf = newstr;
-  return newdata;
+      if (CHAR_DOWNCASE (c1) != CHAR_DOWNCASE (c2))
+	return 0;
+      else
+	{
+	  ++s1;
+	  ++s2;
+	  --len2;
+	}
+    }
+  return !(*s1 || *s2 || len2 > 0);
 }
+#endif
 
-/* Consume an SCSH-style block comment.  Assume that we've already
-   read the initial `#!', and eat characters until we get a
-   exclamation-point/sharp-sign sequence. 
-*/
 
-static void
-skip_scsh_block_comment (SCM port)
+/* Helper function similar to `scm_read_token ()'.  Read from PORT until a
+   whitespace is read.  Return zero if the whole token could fit in BUF,
+   non-zero otherwise.  */
+static inline int
+read_token (SCM port, char *buf, size_t buf_size, size_t *read)
 {
-  int bang_seen = 0;
+  *read = 0;
 
-  for (;;)
+  while (*read < buf_size)
     {
-      int c = scm_getc (port);
-      
-      if (c == EOF)
-	scm_i_input_error ("skip_block_comment", port, 
-			   "unterminated `#! ... !#' comment", SCM_EOL);
+      int chr;
 
-      if (c == '!')
-	bang_seen = 1;
-      else if (c == '#' && bang_seen)
-	return;
+      chr = scm_getc (port);
+      chr = (SCM_CASE_INSENSITIVE_P ? CHAR_DOWNCASE (chr) : chr);
+
+      if (chr == EOF)
+	return 0;
+      else if (CHAR_IS_DELIMITER (chr))
+	{
+	  scm_ungetc (chr, port);
+	  return 0;
+	}
       else
-	bang_seen = 0;
+	{
+	  *buf = (char) chr;
+	  buf++, (*read)++;
+	}
     }
+
+  return 1;
 }
 
-int 
-scm_flush_ws (SCM port, const char *eoferr)
+
+/* Skip whitespace from PORT and return the first non-whitespace character
+   read.  Raise an error on end-of-file.  */
+static int
+flush_ws (SCM port, const char *eoferr)
 {
   register int c;
   while (1)
@@ -210,6 +258,7 @@
 			       SCM_EOL);
 	  }
 	return c;
+
       case ';':
       lp:
 	switch (c = scm_getc (port))
@@ -222,675 +271,879 @@
 	    break;
 	  }
 	break;
-      case '#':
-	switch (c = scm_getc (port))
-	  {
-	  case EOF:
-	    eoferr = "read_sharp";
-	    goto goteof;
-	  case '!':
-	    skip_scsh_block_comment (port);
-	    break;
-	  default:
-	    scm_ungetc (c, port);
-	    return '#';
-	  }
-	break;
+
       case SCM_LINE_INCREMENTORS:
       case SCM_SINGLE_SPACES:
       case '\t':
 	break;
+
       default:
 	return c;
       }
+
+  return 0;
 }
 
 
+
+/* Token readers.  */
 
-int
-scm_casei_streq (char *s1, char *s2)
-{
-  while (*s1 && *s2)
-    if (scm_c_downcase((int)*s1) != scm_c_downcase((int)*s2))
-      return 0;
-    else
-      {
-	++s1;
-	++s2;
-      }
-  return !(*s1 || *s2);
-}
+static SCM scm_read_expression (SCM port);
+static SCM scm_read_sharp (int chr, SCM port);
+static SCM scm_get_hash_procedure (int c);
+static SCM recsexpr (SCM obj, long line, int column, SCM filename);
 
-static int
-scm_i_casei_streq (const char *s1, const char *s2, size_t len2)
-{
-  while (*s1 && len2 > 0)
-    if (scm_c_downcase((int)*s1) != scm_c_downcase((int)*s2))
-      return 0;
-    else
-      {
-	++s1;
-	++s2;
-	--len2;
-      }
-  return !(*s1 || len2 > 0);
-}
 
-/* recsexpr is used when recording expressions
- * constructed by read:sharp.
- */
 static SCM
-recsexpr (SCM obj, long line, int column, SCM filename)
+scm_read_sexp (int chr, SCM port)
+#define FUNC_NAME "scm_i_lreadparen"
 {
-  if (!scm_is_pair(obj)) {
-    return obj;
-  } else {
-    SCM tmp = obj, copy;
-    /* If this sexpr is visible in the read:sharp source, we want to
-       keep that information, so only record non-constant cons cells
-       which haven't previously been read by the reader. */
-    if (scm_is_false (scm_whash_lookup (scm_source_whash, obj)))
-      {
-	if (SCM_COPY_SOURCE_P)
-	  {
-	    copy = scm_cons (recsexpr (SCM_CAR (obj), line, column, filename),
-			     SCM_UNDEFINED);
-	    while ((tmp = SCM_CDR (tmp)) && scm_is_pair (tmp))
-	      {
-		SCM_SETCDR (copy, scm_cons (recsexpr (SCM_CAR (tmp),
-						      line,
-						      column,
-						      filename),
-					    SCM_UNDEFINED));
-		copy = SCM_CDR (copy);
-	      }
-	    SCM_SETCDR (copy, tmp);
-	  }
-	else
-	  {
-	    recsexpr (SCM_CAR (obj), line, column, filename);
-	    while ((tmp = SCM_CDR (tmp)) && scm_is_pair (tmp))
-	      recsexpr (SCM_CAR (tmp), line, column, filename);
-	    copy = SCM_UNDEFINED;
-	  }
-	scm_whash_insert (scm_source_whash,
-			  obj,
-			  scm_make_srcprops (line,
-					     column,
-					     filename,
-					     copy,
-					     SCM_EOL));
-      }
-    return obj;
-  }
-}
+  register int c;
+  register SCM tmp;
+  register SCM tl, ans = SCM_EOL;
+  SCM tl2 = SCM_EOL, ans2 = SCM_EOL, copy = SCM_BOOL_F;;
+  static const int terminating_char = ')';
+
+  /* Need to capture line and column numbers here. */
+  long line = SCM_LINUM (port);
+  int column = SCM_COL (port) - 1;
 
 
-static SCM scm_get_hash_procedure(int c);
-static SCM scm_i_lreadparen (SCM *, SCM, char *, SCM *, char);
+  c = flush_ws (port, FUNC_NAME);
+  if (terminating_char == c)
+    return SCM_EOL;
 
-static char s_list[]="list";
-#if SCM_ENABLE_ELISP
-static char s_vector[]="vector";
-#endif
+  scm_ungetc (c, port);
+  if (scm_is_eq (scm_sym_dot,
+		 (tmp = scm_read_expression (port))))
+    {
+      ans = scm_read_expression (port);
+      if (terminating_char != (c = flush_ws (port, FUNC_NAME)))
+	scm_i_input_error (FUNC_NAME, port, "missing close paren",
+			   SCM_EOL);
+      return ans;
+    }
 
-SCM 
-scm_lreadr (SCM *tok_buf, SCM port, SCM *copy)
-#define FUNC_NAME "scm_lreadr"
-{
-  int c;
-  size_t j;
-  SCM p;
-				  
- tryagain:
-  c = scm_flush_ws (port, s_scm_read);
-  switch (c)
+  /* Build the head of the list structure. */
+  ans = tl = scm_cons (tmp, SCM_EOL);
+
+  if (SCM_COPY_SOURCE_P)
+    ans2 = tl2 = scm_cons (scm_is_pair (tmp)
+			   ? copy
+			   : tmp,
+			   SCM_EOL);
+
+  while (terminating_char != (c = flush_ws (port, FUNC_NAME)))
     {
-    case EOF:
-      return SCM_EOF_VAL;
+      SCM new_tail;
 
-    case '(':
-      return SCM_RECORD_POSITIONS_P
-	? scm_lreadrecparen (tok_buf, port, s_list, copy)
-	: scm_i_lreadparen (tok_buf, port, s_list, copy, ')');
-    case ')':
-      scm_i_input_error (FUNC_NAME, port,"unexpected \")\"", SCM_EOL);
-      goto tryagain;
-    
-#if SCM_ENABLE_ELISP
-    case '[':
-      if (SCM_ELISP_VECTORS_P)
-	{
-	  p = scm_i_lreadparen (tok_buf, port, s_vector, copy, ']');
-	  return scm_is_null (p) ? scm_nullvect : scm_vector (p);
-	}
-      goto read_token;
-#endif
-    case '\'':
-      p = scm_sym_quote;
-      goto recquote;
-    case '`':
-      p = scm_sym_quasiquote;
-      goto recquote;
-    case ',':
-      c = scm_getc (port);
-      if ('@' == c)
-	p = scm_sym_uq_splicing;
-      else
+      scm_ungetc (c, port);
+      if (scm_is_eq (scm_sym_dot,
+		     (tmp = scm_read_expression (port))))
 	{
-	  scm_ungetc (c, port);
-	  p = scm_sym_unquote;
+	  SCM_SETCDR (tl, tmp = scm_read_expression (port));
+
+	  if (SCM_COPY_SOURCE_P)
+	    SCM_SETCDR (tl2, scm_cons (scm_is_pair (tmp) ? copy : tmp,
+				       SCM_EOL));
+
+	  c = flush_ws (port, FUNC_NAME);
+	  if (terminating_char != c)
+	    scm_i_input_error (FUNC_NAME, port,
+			       "in pair: missing close paren", SCM_EOL);
+	  goto exit;
 	}
-    recquote:
-      p = scm_cons2 (p,
-		     scm_lreadr (tok_buf, port, copy),
-		     SCM_EOL);
-      if (SCM_RECORD_POSITIONS_P)
-	scm_whash_insert (scm_source_whash,
-			  p,
-			  scm_make_srcprops (SCM_LINUM (port),
-					     SCM_COL (port) - 1,
-					     SCM_FILENAME (port),
-					     SCM_COPY_SOURCE_P
-					     ? (*copy = scm_cons2 (SCM_CAR (p),
-								   SCM_CAR (SCM_CDR (p)),
-								   SCM_EOL))
-					     : SCM_UNDEFINED,
-					     SCM_EOL));
-      return p;
-    case '#':
-      c = scm_getc (port);
 
-      {
-	/* Check for user-defined hash procedure first, to allow
-	   overriding of builtin hash read syntaxes.  */
-	SCM sharp = scm_get_hash_procedure (c);
-	if (scm_is_true (sharp))
-	  {
-	    long line = SCM_LINUM (port);
-	    int column = SCM_COL (port) - 2;
-	    SCM got;
-
-	    got = scm_call_2 (sharp, SCM_MAKE_CHAR (c), port);
-	    if (scm_is_eq (got, SCM_UNSPECIFIED))
-	      goto handle_sharp;
-	    if (SCM_RECORD_POSITIONS_P)
-	      return *copy = recsexpr (got, line, column,
-				       SCM_FILENAME (port));
-	    else
-	      return got;
-	  }
-      }
-    handle_sharp:
-      switch (c)
+      new_tail = scm_cons (tmp, SCM_EOL);
+      SCM_SETCDR (tl, new_tail);
+      tl = new_tail;
+
+      if (SCM_COPY_SOURCE_P)
 	{
-	  /* Vector, arrays, both uniform and not are handled by this
-	     one function.  It also disambiguates between '#f' and
-	     '#f32' and '#f64'.
-	  */
-	case '0': case '1': case '2': case '3': case '4':
-	case '5': case '6': case '7': case '8': case '9':
-	case 'u': case 's': case 'f':
-	case '@':
-	case '(':
-#if SCM_ENABLE_DEPRECATED
-	  /* See below for 'i' and 'e'. */
-	case 'a':
-	case 'c':
-	case 'y':
-	case 'h':
-	case 'l':
-#endif
-	  return scm_i_read_array (port, c);
+	  SCM new_tail2 = scm_cons (scm_is_pair (tmp)
+				    ? copy
+				    : tmp, SCM_EOL);
+	  SCM_SETCDR (tl2, new_tail2);
+	  tl2 = new_tail2;
+	}
+    }
 
-	case 't':
-	case 'T':
-	  return SCM_BOOL_T;
+ exit:
+  if (SCM_RECORD_POSITIONS_P)
+    scm_whash_insert (scm_source_whash,
+		      ans,
+		      scm_make_srcprops (line, column,
+					 SCM_FILENAME (port),
+					 SCM_COPY_SOURCE_P
+					 ? ans2
+					 : SCM_UNDEFINED,
+					 SCM_EOL));
+  return ans;
+}
+#undef FUNC_NAME
 
-	case 'F':
-	  /* See above for lower case 'f'. */
-	  return SCM_BOOL_F;
+static SCM
+scm_read_string (int chr, SCM port)
+#define FUNC_NAME "scm_lreadr"
+{
+  /* For strings smaller than C_STR, this function creates only one Scheme
+     object (the string returned).  */
 
+  SCM str = SCM_BOOL_F;
+  char c_str[READER_STRING_BUFFER_SIZE];
+  unsigned c_str_len = 0;
+  int c;
 
-	case 'i':
-	case 'e':
-#if SCM_ENABLE_DEPRECATED
-	  {
-	    /* When next char is '(', it really is an old-style
-	       uniform array. */
-	    int next_c = scm_getc (port);
-	    if (next_c != EOF)
-	      scm_ungetc (next_c, port);
-	    if (next_c == '(')
-	      return scm_i_read_array (port, c);
-	    /* Fall through. */
-	  }
-#endif  
-	case 'b':
-	case 'B':
-	case 'o':
-	case 'O':
-	case 'd':
-	case 'D':
-	case 'x':
-	case 'X':
-	case 'I':
-	case 'E':
-	  scm_ungetc (c, port);
-	  c = '#';
-	  goto num;
-
-	case '!':
-	  /* should never happen, #!...!# block comments are skipped
-	     over in scm_flush_ws. */
-	  abort ();
-
-	case '*':
-	  j = scm_read_token (c, tok_buf, port, 0);
-	  p = scm_istr2bve (scm_c_substring_shared (*tok_buf, 1, j));
-	  if (scm_is_true (p))
-	    return p;
-	  else
-	    goto unkshrp;
+  while ('"' != (c = scm_getc (port)))
+    {
+      if (c == EOF)
+	str_eof: scm_i_input_error (FUNC_NAME, port,
+				    "end of file in string constant",
+				    SCM_EOL);
 
-	case '{':
-	  j = scm_read_token (c, tok_buf, port, 1);
-	  return scm_string_to_symbol (scm_c_substring_copy (*tok_buf, 0, j));
-
-	case '\\':
-	  c = scm_getc (port);
-	  j = scm_read_token (c, tok_buf, port, 0);
-	  if (j == 1)
-	    return SCM_MAKE_CHAR (c);
-	  if (c >= '0' && c < '8')
-	    {
-	      /* Dirk:FIXME::  This type of character syntax is not R5RS
-	       * compliant.  Further, it should be verified that the constant
-	       * does only consist of octal digits.  Finally, it should be
-	       * checked whether the resulting fixnum is in the range of
-	       * characters.  */
-	      p = scm_c_locale_stringn_to_number (scm_i_string_chars (*tok_buf),
-						  j, 8);
-	      if (SCM_I_INUMP (p))
-		return SCM_MAKE_CHAR (SCM_I_INUM (p));
-	    }
-	  for (c = 0; c < scm_n_charnames; c++)
-	    if (scm_charnames[c]
-		&& (scm_i_casei_streq (scm_charnames[c],
-				       scm_i_string_chars (*tok_buf), j)))
-	      return SCM_MAKE_CHAR (scm_charnums[c]);
-	  scm_i_input_error (FUNC_NAME, port, "unknown character name ~a",
-			     scm_list_1 (scm_c_substring (*tok_buf, 0, j)));
+      if (c_str_len + 1 >= sizeof (c_str))
+	{
+	  /* Flush the C buffer onto a Scheme string.  */
+	  SCM addy;
 
-	  /* #:SYMBOL is a syntax for keywords supported in all contexts.  */
-	case ':':
-	  return scm_symbol_to_keyword (scm_read (port));
+	  if (str == SCM_BOOL_F)
+	    str = scm_c_make_string (0, SCM_MAKE_CHAR ('X'));
 
-	default:
-	callshrp:
-	  {
-	    SCM sharp = scm_get_hash_procedure (c);
+	  addy = scm_from_locale_stringn (c_str, c_str_len);
+	  str = scm_string_append_shared (scm_list_2 (str, addy));
 
-	    if (scm_is_true (sharp))
-	      {
-		long line = SCM_LINUM (port);
-		int column = SCM_COL (port) - 2;
-		SCM got;
-
-		got = scm_call_2 (sharp, SCM_MAKE_CHAR (c), port);
-		if (scm_is_eq (got, SCM_UNSPECIFIED))
-		  goto unkshrp;
-		if (SCM_RECORD_POSITIONS_P)
-		  return *copy = recsexpr (got, line, column,
-					   SCM_FILENAME (port));
-		else
-		  return got;
-	      }
-	  }
-	unkshrp:
-	scm_i_input_error (FUNC_NAME, port, "Unknown # object: ~S",
-			   scm_list_1 (SCM_MAKE_CHAR (c)));
+	  c_str_len = 0;
 	}
 
-    case '"':
-      j = 0;
-      while ('"' != (c = scm_getc (port)))
-	{
-	  if (c == EOF)
-	    str_eof: scm_i_input_error (FUNC_NAME, port,
-					"end of file in string constant", 
-					SCM_EOL);
-
-	  while (j + 2 >= scm_i_string_length (*tok_buf))
-	    scm_grow_tok_buf (tok_buf);
-
-	  if (c == '\\')
-	    switch (c = scm_getc (port))
-	      {
-	      case EOF:
-		goto str_eof;
-	      case '"':
-	      case '\\':
-		break;
+      if (c == '\\')
+	switch (c = scm_getc (port))
+	  {
+	  case EOF:
+	    goto str_eof;
+	  case '"':
+	  case '\\':
+	    break;
 #if SCM_ENABLE_ELISP
-	      case '(':
-	      case ')':
-		if (SCM_ESCAPED_PARENS_P)
-		  break;
-		goto bad_escaped;
+	  case '(':
+	  case ')':
+	    if (SCM_ESCAPED_PARENS_P)
+	      break;
+	    goto bad_escaped;
 #endif
-	      case '\n':
-		continue;
-	      case '0':
-		c = '\0';
-		break;
-	      case 'f':
-		c = '\f';
-		break;
-	      case 'n':
-		c = '\n';
-		break;
-	      case 'r':
-		c = '\r';
-		break;
-	      case 't':
-		c = '\t';
-		break;
-	      case 'a':
-		c = '\007';
-		break;
-	      case 'v':
-		c = '\v';
-		break;
-	      case 'x':
-		{
-		  int a, b;
-		  a = scm_getc (port);
-		  if (a == EOF) goto str_eof;
-		  b = scm_getc (port);
-		  if (b == EOF) goto str_eof;
-		  if      ('0' <= a && a <= '9') a -= '0';
-		  else if ('A' <= a && a <= 'F') a = a - 'A' + 10;
-		  else if ('a' <= a && a <= 'f') a = a - 'a' + 10;
-		  else goto bad_escaped;
-		  if      ('0' <= b && b <= '9') b -= '0';
-		  else if ('A' <= b && b <= 'F') b = b - 'A' + 10;
-		  else if ('a' <= b && b <= 'f') b = b - 'a' + 10;
-		  else goto bad_escaped;
-		  c = a * 16 + b;
-		  break;
-		}
-	      default:
-	      bad_escaped:
-		scm_i_input_error(FUNC_NAME, port,
-				  "illegal character in escape sequence: ~S",
-				  scm_list_1 (SCM_MAKE_CHAR (c)));
-	      }
-	  scm_c_string_set_x (*tok_buf, j, SCM_MAKE_CHAR (c));
-	  ++j;
-	}
-      if (j == 0)
-	return scm_nullstr;
-
-      /* Change this to scm_c_substring_read_only when
-	 SCM_STRING_CHARS has been removed.
-      */
-      return scm_c_substring_copy (*tok_buf, 0, j);
-
-    case '0': case '1': case '2': case '3': case '4':
-    case '5': case '6': case '7': case '8': case '9':
-    case '.':
-    case '-':
-    case '+':
-    num:
-      j = scm_read_token (c, tok_buf, port, 0);
-      if (j == 1 && (c == '+' || c == '-'))
-	/* Shortcut:  Detected symbol '+ or '- */
-	goto tok;
-
-      p = scm_c_locale_stringn_to_number (scm_i_string_chars (*tok_buf), j, 10);
-      if (scm_is_true (p))
-	return p;
-      if (c == '#')
-	{
-	  if ((j == 2) && (scm_getc (port) == '('))
+	  case '\n':
+	    continue;
+	  case '0':
+	    c = '\0';
+	    break;
+	  case 'f':
+	    c = '\f';
+	    break;
+	  case 'n':
+	    c = '\n';
+	    break;
+	  case 'r':
+	    c = '\r';
+	    break;
+	  case 't':
+	    c = '\t';
+	    break;
+	  case 'a':
+	    c = '\007';
+	    break;
+	  case 'v':
+	    c = '\v';
+	    break;
+	  case 'x':
 	    {
-	      scm_ungetc ('(', port);
-	      c = scm_i_string_chars (*tok_buf)[1];
-	      goto callshrp;
+	      int a, b;
+	      a = scm_getc (port);
+	      if (a == EOF) goto str_eof;
+	      b = scm_getc (port);
+	      if (b == EOF) goto str_eof;
+	      if      ('0' <= a && a <= '9') a -= '0';
+	      else if ('A' <= a && a <= 'F') a = a - 'A' + 10;
+	      else if ('a' <= a && a <= 'f') a = a - 'a' + 10;
+	      else goto bad_escaped;
+	      if      ('0' <= b && b <= '9') b -= '0';
+	      else if ('A' <= b && b <= 'F') b = b - 'A' + 10;
+	      else if ('a' <= b && b <= 'f') b = b - 'a' + 10;
+	      else goto bad_escaped;
+	      c = a * 16 + b;
+	      break;
 	    }
-	  scm_i_input_error (FUNC_NAME, port, "unknown # object", SCM_EOL);
-	}
-      goto tok;
-
-    case ':':
-      if (scm_is_eq (SCM_PACK (SCM_KEYWORD_STYLE), scm_keyword_prefix))
-	return scm_symbol_to_keyword (scm_read (port));
+	  default:
+	  bad_escaped:
+	    scm_i_input_error (FUNC_NAME, port,
+			       "illegal character in escape sequence: ~S",
+			       scm_list_1 (SCM_MAKE_CHAR (c)));
+	  }
+      c_str[c_str_len++] = c;
+    }
 
-      /* fallthrough */
-    default:
-#if SCM_ENABLE_ELISP
-    read_token:
-#endif
-      j = scm_read_token (c, tok_buf, port, 0);
-      /* fallthrough */
+  if (c_str_len > 0)
+    {
+      SCM addy;
 
-    tok:
-      return scm_string_to_symbol (scm_c_substring (*tok_buf, 0, j));
+      addy = scm_from_locale_stringn (c_str, c_str_len);
+      if (str == SCM_BOOL_F)
+	str = addy;
+      else
+	str = scm_string_append_shared (scm_list_2 (str, addy));
     }
+  else
+    str = (str == SCM_BOOL_F) ? scm_nullstr : str;
+
+  return str;
 }
 #undef FUNC_NAME
 
 
-#ifdef _UNICOS
-_Pragma ("noopt");		/* # pragma _CRI noopt */
-#endif
-
-size_t 
-scm_read_token (int ic, SCM *tok_buf, SCM port, int weird)
+static SCM
+scm_read_number (int chr, SCM port)
 {
-  size_t j;
-  int c;
+  SCM result, str = SCM_EOL;
+  char buffer[READER_BUFFER_SIZE];
+  size_t read;
+  int overflow = 0;
 
-  c = (SCM_CASE_INSENSITIVE_P ? scm_c_downcase(ic) : ic);
-					    
-  if (weird)
-    j = 0;
-  else
+  scm_ungetc (chr, port);
+  do
     {
-      j = 0;
-      while (j + 2 >= scm_i_string_length (*tok_buf))
-	scm_grow_tok_buf (tok_buf);
-      scm_c_string_set_x (*tok_buf, j, SCM_MAKE_CHAR (c));
-      ++j;
+      overflow = read_token (port, buffer, sizeof (buffer), &read);
+
+      if ((overflow) || (scm_is_pair (str)))
+	str = scm_cons (scm_from_locale_stringn (buffer, read), str);
     }
+  while (overflow);
 
-  while (1)
+  if (scm_is_pair (str))
     {
-      while (j + 2 >= scm_i_string_length (*tok_buf))
-	scm_grow_tok_buf (tok_buf);
-      c = scm_getc (port);
-      switch (c)
-	{
-	case '(':
-	case ')':
-#if SCM_ENABLE_ELISP
-	case '[':
-	case ']':
-#endif
-	case '"':
-	case ';':
-	case SCM_WHITE_SPACES:
-	case SCM_LINE_INCREMENTORS:
-	  if (weird
-#if SCM_ENABLE_ELISP
-	      || ((!SCM_ELISP_VECTORS_P) && ((c == '[') || (c == ']')))
-#endif
-	      )
-	    goto default_case;
+      /* The slow path.  */
 
-	  scm_ungetc (c, port);
-	case EOF:
-	eof_case:
-	  return j;
-	case '\\':
-	  if (!weird)
-	    goto default_case;
-	  else
-	    {
-	      c = scm_getc (port);
-	      if (c == EOF)
-		goto eof_case;
-	      else
-		goto default_case;
-	    }
-	case '}':
-	  if (!weird)
-	    goto default_case;
+      str = scm_string_concatenate (scm_reverse_x (str, SCM_EOL));
+      result = scm_string_to_number (str, SCM_UNDEFINED);
+      if (!scm_is_true (result))
+	/* Return a symbol instead of a number.  */
+	result = scm_string_to_symbol (str);
+    }
+  else
+    {
+      result = scm_c_locale_stringn_to_number (buffer, read, 10);
+      if (!scm_is_true (result))
+	/* Return a symbol instead of a number.  */
+	result = scm_from_locale_symboln (buffer, read);
+    }
 
-	  c = scm_getc (port);
-	  if (c == '#')
-	    {
-	      return j;
-	    }
-	  else
-	    {
-	      scm_ungetc (c, port);
-	      c = '}';
-	      goto default_case;
-	    }
+  return result;
+}
 
-	default:
-	default_case:
+static SCM
+scm_read_mixed_case_symbol (int chr, SCM port)
+{
+  SCM result, str = SCM_EOL;
+  int overflow = 0;
+  char buffer[READER_BUFFER_SIZE];
+  size_t read = 0;
+
+  scm_ungetc (chr, port);
+  do
+    {
+      overflow = read_token (port, buffer, sizeof (buffer), &read);
+
+      if ((overflow) || (scm_is_pair (str)))
+	str = scm_cons (scm_from_locale_stringn (buffer, read), str);
+    }
+  while (overflow);
+
+  if (scm_is_pair (str))
+    {
+      str = scm_string_concatenate (scm_reverse_x (str, SCM_EOL));
+      result = scm_string_to_symbol (str);
+    }
+  else
+    /* For symbols smaller than `sizeof (buffer)', we don't need to recur to
+       Scheme strings.  Therefore, we only create one Scheme object (a
+       symbol) per symbol read.  */
+    result = scm_from_locale_symboln (buffer, read);
+
+  return result;
+}
+
+static SCM
+scm_read_number_and_radix (int chr, SCM port)
+#define FUNC_NAME "scm_lreadr"
+{
+  SCM result, str = SCM_EOL;
+  size_t read;
+  char buffer[READER_BUFFER_SIZE];
+  unsigned int radix;
+  int overflow = 0;
+
+  switch (chr)
+    {
+    case 'B':
+    case 'b':
+      radix = 2;
+      break;
+
+    case 'o':
+    case 'O':
+      radix = 8;
+      break;
+
+    case 'd':
+    case 'D':
+      radix = 10;
+      break;
+
+    case 'x':
+    case 'X':
+      radix = 16;
+      break;
+
+    default:
+      scm_ungetc (chr, port);
+      scm_ungetc ('#', port);
+      radix = 10;
+    }
+
+  do
+    {
+      overflow = read_token (port, buffer, sizeof (buffer), &read);
+
+      if ((overflow) || (scm_is_pair (str)))
+	str = scm_cons (scm_from_locale_stringn (buffer, read), str);
+    }
+  while (overflow);
+
+  if (scm_is_pair (str))
+    {
+      str = scm_string_concatenate (scm_reverse_x (str, SCM_EOL));
+      result = scm_string_to_number (str, scm_from_uint (radix));
+    }
+  else
+    result = scm_c_locale_stringn_to_number (buffer, read, radix);
+
+  if (scm_is_true (result))
+    return result;
+
+  scm_i_input_error (FUNC_NAME, port, "unknown # object", SCM_EOL);
+
+  return SCM_BOOL_F;
+}
+#undef FUNC_NAME
+
+static SCM
+scm_read_quote (int chr, SCM port)
+{
+  SCM p;
+
+  switch (chr)
+    {
+    case '`':
+      p = scm_sym_quasiquote;
+      break;
+
+    case '\'':
+      p = scm_sym_quote;
+      break;
+
+    case ',':
+      {
+	int c;
+
+	c = scm_getc (port);
+	if ('@' == c)
+	  p = scm_sym_uq_splicing;
+	else
 	  {
-	    c = (SCM_CASE_INSENSITIVE_P ? scm_c_downcase(c) : c);
-            scm_c_string_set_x (*tok_buf, j, SCM_MAKE_CHAR (c));
-	    ++j;
+	    scm_ungetc (c, port);
+	    p = scm_sym_unquote;
 	  }
+	break;
+      }
 
-	}
+    default:
+      fprintf (stderr, "%s: unhandled quote character (%i)\n",
+	       __FUNCTION__, chr);
+      abort ();
     }
-}
 
-#ifdef _UNICOS
-_Pragma ("opt");		/* # pragma _CRI opt */
-#endif
+  p = scm_cons2 (p, scm_read_expression (port), SCM_EOL);
 
-static SCM 
-scm_i_lreadparen (SCM *tok_buf, SCM port, char *name, SCM *copy, char term_char)
-#define FUNC_NAME "scm_i_lreadparen"
+  return p;
+}
+
+static inline SCM
+scm_read_semicolon_comment (int chr, SCM port)
 {
-  SCM tmp;
-  SCM tl;
-  SCM ans;
   int c;
 
-  c = scm_flush_ws (port, name);
-  if (term_char == c)
-    return SCM_EOL;
-  scm_ungetc (c, port);
-  if (scm_is_eq (scm_sym_dot, (tmp = scm_lreadr (tok_buf, port, copy))))
+  for (c = scm_getc (port);
+       (c != EOF) && (c != '\n');
+       c = scm_getc (port));
+
+  return SCM_UNSPECIFIED;
+}
+
+
+/* Sharp readers, i.e. readers called after a `#' sign has been read.  */
+
+static SCM
+scm_read_boolean (int chr, SCM port)
+{
+  switch (chr)
     {
-      ans = scm_lreadr (tok_buf, port, copy);
-    closeit:
-      if (term_char != (c = scm_flush_ws (port, name)))
-	scm_i_input_error (FUNC_NAME, port, "missing close paren", SCM_EOL);
-      return ans;
+    case 't':
+    case 'T':
+      return SCM_BOOL_T;
+
+    case 'f':
+    case 'F':
+      return SCM_BOOL_F;
     }
-  ans = tl = scm_cons (tmp, SCM_EOL);
-  while (term_char != (c = scm_flush_ws (port, name)))
+
+  return SCM_UNSPECIFIED;
+}
+
+static SCM
+scm_read_character (int chr, SCM port)
+#define FUNC_NAME "scm_lreadr"
+{
+  unsigned c;
+  char charname[READER_CHAR_NAME_MAX_SIZE];
+  size_t charname_len;
+
+  if (read_token (port, charname, sizeof (charname), &charname_len))
+    goto char_error;
+
+  if (charname_len == 0)
     {
-      scm_ungetc (c, port);
-      if (scm_is_eq (scm_sym_dot, (tmp = scm_lreadr (tok_buf, port, copy))))
-	{
-	  SCM_SETCDR (tl, scm_lreadr (tok_buf, port, copy));
-	  goto closeit;
-	}
-      SCM_SETCDR (tl, scm_cons (tmp, SCM_EOL));
-      tl = SCM_CDR (tl);
+      chr = scm_getc (port);
+      if (chr == EOF)
+	scm_i_input_error (FUNC_NAME, port, "unexpected end of file "
+			   "while reading character", SCM_EOL);
+
+      /* CHR must be a token delimiter, like a whitespace.  */
+      return (SCM_MAKE_CHAR (chr));
     }
-  return ans;
+
+  if (charname_len == 1)
+    return SCM_MAKE_CHAR (charname[0]);
+
+  if (*charname >= '0' && *charname < '8')
+    {
+      /* Dirk:FIXME::  This type of character syntax is not R5RS
+       * compliant.  Further, it should be verified that the constant
+       * does only consist of octal digits.  Finally, it should be
+       * checked whether the resulting fixnum is in the range of
+       * characters.  */
+      SCM p = scm_c_locale_stringn_to_number (charname, charname_len, 8);
+      if (SCM_I_INUMP (p))
+	return SCM_MAKE_CHAR (SCM_I_INUM (p));
+    }
+
+  for (c = 0; c < scm_n_charnames; c++)
+    if (scm_charnames[c]
+	&& (!strncasecmp (scm_charnames[c], charname, charname_len)))
+      return SCM_MAKE_CHAR (scm_charnums[c]);
+
+ char_error:
+  scm_i_input_error (FUNC_NAME, port, "unknown character name ~a",
+		     scm_list_1 (scm_from_locale_stringn (charname,
+							  charname_len)));
+
+  return SCM_UNSPECIFIED;
 }
 #undef FUNC_NAME
 
+static inline SCM
+scm_read_keyword (int chr, SCM port)
+{
+  SCM symbol;
+
+  /* Read the symbol that comprises the keyword.  Doing this instead of
+     invoking a specific symbol reader function allows `scm_read_keyword ()'
+     to adapt to the delimiters currently valid of symbols.
+
+     XXX: This implementation allows sloppy syntaxes like `#:  key'.  */
+  symbol = scm_read_expression (port);
+  if (!scm_is_symbol (symbol))
+    scm_i_input_error (__FUNCTION__, port,
+		       "keyword prefix `~a' not followed by a symbol: ~s",
+		       scm_list_2 (SCM_MAKE_CHAR (chr), symbol));
 
-SCM 
-scm_lreadrecparen (SCM *tok_buf, SCM port, char *name, SCM *copy)
-#define FUNC_NAME "scm_lreadrecparen"
+  return (scm_symbol_to_keyword (symbol));
+}
+
+static inline SCM
+scm_read_vector (int chr, SCM port)
 {
-  register int c;
-  register SCM tmp;
-  register SCM tl, tl2 = SCM_EOL;
-  SCM ans, ans2 = SCM_EOL;
-  /* Need to capture line and column numbers here. */
-  long line = SCM_LINUM (port);
-  int column = SCM_COL (port) - 1;
+  /* Note: We call `scm_read_sexp ()' rather than READER here in order to
+     guarantee that it's going to do what we want.  After all, this is an
+     implementation detail of `scm_read_vector ()', not a desirable
+     property.  */
+  return (scm_vector (scm_read_sexp (chr, port)));
+}
 
-  c = scm_flush_ws (port, name);
-  if (')' == c)
-    return SCM_EOL;
-  scm_ungetc (c, port);
-  if (scm_is_eq (scm_sym_dot, (tmp = scm_lreadr (tok_buf, port, copy))))
+static inline SCM
+scm_read_srfi4_vector (int chr, SCM port)
+{
+  return scm_i_read_array (port, chr);
+}
+
+static SCM
+scm_read_guile_bit_vector (int chr, SCM port)
+{
+  /* Read the `#*10101'-style read syntax for bit vectors in Guile.  This is
+     terribly inefficient but who cares?  */
+  SCM s_bits = SCM_EOL;
+
+  for (chr = scm_getc (port);
+       (chr != EOF) && ((chr == '0') || (chr == '1'));
+       chr = scm_getc (port))
     {
-      ans = scm_lreadr (tok_buf, port, copy);
-      if (')' != (c = scm_flush_ws (port, name)))
-	scm_i_input_error (FUNC_NAME, port, "missing close paren", SCM_EOL);
-      return ans;
+      s_bits = scm_cons ((chr == '0') ? SCM_BOOL_F : SCM_BOOL_T, s_bits);
     }
-  /* Build the head of the list structure. */
-  ans = tl = scm_cons (tmp, SCM_EOL);
-  if (SCM_COPY_SOURCE_P)
-    ans2 = tl2 = scm_cons (scm_is_pair (tmp)
-			   ? *copy
-			   : tmp,
-			   SCM_EOL);
-  while (')' != (c = scm_flush_ws (port, name)))
+
+  if (chr != EOF)
+    scm_ungetc (chr, port);
+
+  return scm_bitvector (scm_reverse_x (s_bits, SCM_EOL));
+}
+
+static inline SCM
+scm_read_scsh_block_comment (int chr, SCM port)
+{
+  int bang_seen = 0;
+
+  for (;;)
     {
-      SCM new_tail;
+      int c = scm_getc (port);
 
-      scm_ungetc (c, port);
-      if (scm_is_eq (scm_sym_dot, (tmp = scm_lreadr (tok_buf, port, copy))))
+      if (c == EOF)
+	scm_i_input_error ("skip_block_comment", port,
+			   "unterminated `#! ... !#' comment", SCM_EOL);
+
+      if (c == '!')
+	bang_seen = 1;
+      else if (c == '#' && bang_seen)
+	break;
+      else
+	bang_seen = 0;
+    }
+
+  return SCM_UNSPECIFIED;
+}
+
+static SCM
+scm_read_extended_symbol (int chr, SCM port)
+{
+  /* Guile's extended symbol read syntax looks like this:
+
+       #{This is all a symbol name}#
+
+     So here, CHR is expected to be `{'.  */
+  SCM result;
+  int saw_brace = 0, finished = 0;
+  size_t len = 0;
+  char buf[1024];
+
+  result = scm_c_make_string (0, SCM_MAKE_CHAR ('X'));
+
+  while ((chr = scm_getc (port)) != EOF)
+    {
+      if (saw_brace)
 	{
-	  SCM_SETCDR (tl, tmp = scm_lreadr (tok_buf, port, copy));
-	  if (SCM_COPY_SOURCE_P)
-	    SCM_SETCDR (tl2, scm_cons (scm_is_pair (tmp)
-				       ? *copy
-				       : tmp,
-				       SCM_EOL));
-	  if (')' != (c = scm_flush_ws (port, name)))
-	    scm_i_input_error (FUNC_NAME, port,
-			       "missing close paren", SCM_EOL);
-	  goto exit;
+	  if (chr == '#')
+	    {
+	      finished = 1;
+	      break;
+	    }
+	  else
+	    {
+	      saw_brace = 0;
+	      buf[len++] = '}';
+	      buf[len++] = chr;
+	    }
 	}
+      else if (chr == '}')
+	saw_brace = 1;
+      else
+	buf[len++] = chr;
 
-      new_tail = scm_cons (tmp, SCM_EOL);
-      SCM_SETCDR (tl, new_tail);
-      tl = new_tail;
+      if (len >= sizeof (buf) - 2)
+	{
+	  scm_string_append (scm_list_2 (result,
+					 scm_from_locale_stringn (buf, len)));
+	  len = 0;
+	}
 
-      if (SCM_COPY_SOURCE_P)
+      if (finished)
+	break;
+    }
+
+  if (len)
+    result = scm_string_append (scm_list_2
+				(result,
+				 scm_from_locale_stringn (buf, len)));
+
+  return (scm_string_to_symbol (result));
+}
+
+
+
+/* Top-level token readers, i.e., dispatchers.  */
+
+static SCM
+scm_read_sharp_extension (int chr, SCM port)
+{
+  SCM proc;
+
+  proc = scm_get_hash_procedure (chr);
+  if (scm_is_true (scm_procedure_p (proc)))
+    {
+      long line = SCM_LINUM (port);
+      int column = SCM_COL (port) - 2;
+      SCM got;
+
+      got = scm_call_2 (proc, SCM_MAKE_CHAR (chr), port);
+      if (!scm_is_eq (got, SCM_UNSPECIFIED))
 	{
-	  SCM new_tail2 = scm_cons (scm_is_pair (tmp) ? *copy : tmp, SCM_EOL);
-	  SCM_SETCDR (tl2, new_tail2);
-	  tl2 = new_tail2;
+	  if (SCM_RECORD_POSITIONS_P)
+	    return (recsexpr (got, line, column,
+			      SCM_FILENAME (port)));
+	  else
+	    return got;
 	}
     }
-exit:
-  scm_whash_insert (scm_source_whash,
-		    ans,
-		    scm_make_srcprops (line,
-				       column,
-				       SCM_FILENAME (port),
-				       SCM_COPY_SOURCE_P
-				       ? *copy = ans2
-				       : SCM_UNDEFINED,
-				       SCM_EOL));
-  return ans;
+
+  return SCM_UNSPECIFIED;
+}
+
+/* The reader for the sharp `#' character.  It basically dispatches reads
+   among the above token readers.   */
+static SCM
+scm_read_sharp (int chr, SCM port)
+#define FUNC_NAME "scm_lreadr"
+{
+  SCM result;
+
+  chr = scm_getc (port);
+
+  result = scm_read_sharp_extension (chr, port);
+  if (!scm_is_eq (result, SCM_UNSPECIFIED))
+    return result;
+
+  switch (chr)
+    {
+    case '\\':
+      return (scm_read_character (chr, port));
+    case '(':
+      return (scm_read_vector (chr, port));
+    case 's':
+    case 'u':
+    case 'f':
+      /* This one may return either a boolean or an SRFI-4 vector.  */
+      return (scm_read_srfi4_vector (chr, port));
+    case '*':
+      return (scm_read_guile_bit_vector (chr, port));
+    case 't':
+    case 'T':
+    case 'F':
+      /* This one may return either a boolean or an SRFI-4 vector.  */
+      return (scm_read_boolean (chr, port));
+    case ':':
+      return (scm_read_keyword (chr, port));
+    case '0': case '1': case '2': case '3': case '4':
+    case '5': case '6': case '7': case '8': case '9':
+    case '@':
+#if SCM_ENABLE_DEPRECATED
+      /* See below for 'i' and 'e'. */
+    case 'a':
+    case 'c':
+    case 'y':
+    case 'h':
+    case 'l':
+#endif
+      return (scm_i_read_array (port, chr));
+
+    case 'i':
+    case 'e':
+#if SCM_ENABLE_DEPRECATED
+      {
+	/* When next char is '(', it really is an old-style
+	   uniform array. */
+	int next_c = scm_getc (port);
+	if (next_c != EOF)
+	  scm_ungetc (next_c, port);
+	if (next_c == '(')
+	  return scm_i_read_array (port, chr);
+	/* Fall through. */
+      }
+#endif
+    case 'b':
+    case 'B':
+    case 'o':
+    case 'O':
+    case 'd':
+    case 'D':
+    case 'x':
+    case 'X':
+    case 'I':
+    case 'E':
+      return (scm_read_number_and_radix (chr, port));
+    case '{':
+      return (scm_read_extended_symbol (chr, port));
+    case '!':
+      return (scm_read_scsh_block_comment (chr, port));
+    default:
+      result = scm_read_sharp_extension (chr, port);
+      if (scm_is_eq (result, SCM_UNSPECIFIED))
+	scm_i_input_error (FUNC_NAME, port, "Unknown # object: ~S",
+			   scm_list_1 (SCM_MAKE_CHAR (chr)));
+      else
+	return result;
+    }
+
+  return SCM_UNSPECIFIED;
+}
+#undef FUNC_NAME
+
+static SCM
+scm_read_expression (SCM port)
+#define FUNC_NAME "scm_read_expression"
+{
+  while (1)
+    {
+      register int chr;
+
+      chr = scm_getc (port);
+
+      switch (chr)
+	{
+	case SCM_WHITE_SPACES:
+	case SCM_LINE_INCREMENTORS:
+	  break;
+	case ';':
+	  (void) scm_read_semicolon_comment (chr, port);
+	  break;
+	case '(':
+	  return (scm_read_sexp (chr, port));
+	case '"':
+	  return (scm_read_string (chr, port));
+	case '\'':
+	case '`':
+	case ',':
+	  return (scm_read_quote (chr, port));
+	case '#':
+	  {
+	    SCM result;
+	    result = scm_read_sharp (chr, port);
+	    if (scm_is_eq (result, SCM_UNSPECIFIED))
+	      /* We read a comment or some such.  */
+	      break;
+	    else
+	      return result;
+	  }
+	case ')':
+	  scm_i_input_error (FUNC_NAME, port, "unexpected \")\"", SCM_EOL);
+	  break;
+	case EOF:
+	  return SCM_EOF_VAL;
+	case ':':
+	  if (scm_is_eq (SCM_PACK (SCM_KEYWORD_STYLE), scm_keyword_prefix))
+	    return scm_symbol_to_keyword (scm_read_expression (port));
+	  /* Fall through.  */
+
+	default:
+	  {
+	    if (((chr >= '0') && (chr <= '9'))
+		|| (strchr ("+-.", chr)))
+	      return (scm_read_number (chr, port));
+	    else
+	      return (scm_read_mixed_case_symbol (chr, port));
+	  }
+	}
+    }
+}
+#undef FUNC_NAME
+
+
+/* Actual reader.  */
+
+SCM_DEFINE (scm_read, "read", 0, 1, 0, 
+            (SCM port),
+	    "Read an s-expression from the input port @var{port}, or from\n"
+	    "the current input port if @var{port} is not specified.\n"
+	    "Any whitespace before the next token is discarded.")
+#define FUNC_NAME s_scm_read
+{
+  int c;
+
+  if (SCM_UNBNDP (port))
+    port = scm_current_input_port ();
+  SCM_VALIDATE_OPINPORT (1, port);
+
+  c = flush_ws (port, (char *) NULL);
+  if (EOF == c)
+    return SCM_EOF_VAL;
+  scm_ungetc (c, port);
+
+  return (scm_read_expression (port));
 }
 #undef FUNC_NAME
 
 
+/* Used when recording expressions constructed by `scm_read_sharp ()'.  */
+static SCM
+recsexpr (SCM obj, long line, int column, SCM filename)
+{
+  if (!scm_is_pair(obj)) {
+    return obj;
+  } else {
+    SCM tmp = obj, copy;
+    /* If this sexpr is visible in the read:sharp source, we want to
+       keep that information, so only record non-constant cons cells
+       which haven't previously been read by the reader. */
+    if (scm_is_false (scm_whash_lookup (scm_source_whash, obj)))
+      {
+	if (SCM_COPY_SOURCE_P)
+	  {
+	    copy = scm_cons (recsexpr (SCM_CAR (obj), line, column, filename),
+			     SCM_UNDEFINED);
+	    while ((tmp = SCM_CDR (tmp)) && scm_is_pair (tmp))
+	      {
+		SCM_SETCDR (copy, scm_cons (recsexpr (SCM_CAR (tmp),
+						      line,
+						      column,
+						      filename),
+					    SCM_UNDEFINED));
+		copy = SCM_CDR (copy);
+	      }
+	    SCM_SETCDR (copy, tmp);
+	  }
+	else
+	  {
+	    recsexpr (SCM_CAR (obj), line, column, filename);
+	    while ((tmp = SCM_CDR (tmp)) && scm_is_pair (tmp))
+	      recsexpr (SCM_CAR (tmp), line, column, filename);
+	    copy = SCM_UNDEFINED;
+	  }
+	scm_whash_insert (scm_source_whash,
+			  obj,
+			  scm_make_srcprops (line,
+					     column,
+					     filename,
+					     copy,
+					     SCM_EOL));
+      }
+    return obj;
+  }
+}
+
 /* Manipulate the read-hash-procedures alist.  This could be written in
    Scheme, but maybe it will also be used by C code during initialisation.  */
 SCM_DEFINE (scm_read_hash_extend, "read-hash-extend", 2, 0, 0,


--- orig/libguile/read.h
+++ mod/libguile/read.h
@@ -53,16 +53,12 @@
 
 SCM_API SCM scm_read_options (SCM setting);
 SCM_API SCM scm_read (SCM port);
-SCM_API char * scm_grow_tok_buf (SCM * tok_buf);
-SCM_API int scm_flush_ws (SCM port, const char *eoferr);
-SCM_API int scm_casei_streq (char * s1, char * s2);
-SCM_API SCM scm_lreadr (SCM * tok_buf, SCM port, SCM *copy);
 SCM_API size_t scm_read_token (int ic, SCM * tok_buf, SCM port, int weird);
-SCM_API SCM scm_lreadrecparen (SCM * tok_buf, SCM port, char *name, SCM *copy);
 SCM_API SCM scm_read_hash_extend (SCM chr, SCM proc);
 
 SCM_API void scm_i_input_error (const char *func, SCM port,
-				const char *message, SCM arg);
+				const char *message, SCM arg)
+  SCM_NORETURN;
 
 SCM_API void scm_init_read (void);
 

--- orig/test-suite/ChangeLog
+++ mod/test-suite/ChangeLog
@@ -1,3 +1,14 @@
+2007-07-22  Ludovic Courtès  <ludo@gnu.org>
+
+	* tests/reader.test: Added a proper header and `define-module'.
+	(exception:unterminated-block-comment,
+	exception:unknown-character-name,
+	exception:unknown-sharp-object, exception:eof-in-string,
+	exception:illegal-escape, with-read-options): New.
+	(reading)[block comment, unprintable symbol]: New tests.
+	(exceptions): New test prefix.
+	(read-options): New test prefix.
+
 2007-07-18  Stephen Compall  <s11@member.fsf.org>
 
 	* tests/syntax.test: Add SRFI-61 `cond' tests.


--- orig/test-suite/tests/reader.test
+++ mod/test-suite/tests/reader.test
@@ -1,15 +1,55 @@
-;;;; reader.test --- test the Guile parser -*- scheme -*-
-;;;; Jim Blandy <jimb@red-bean.com> --- September 1999
+;;;; reader.test --- Exercise the reader.               -*- Scheme -*-
+;;;;
+;;;; Copyright (C) 1999, 2001, 2002, 2003, 2007 Free Software Foundation, Inc.
+;;;; Jim Blandy <jimb@red-bean.com>
+;;;;
+;;;; This library is free software; you can redistribute it and/or
+;;;; modify it under the terms of the GNU Lesser General Public
+;;;; License as published by the Free Software Foundation; either
+;;;; version 2.1 of the License, or (at your option) any later version.
+;;;;
+;;;; This library is distributed in the hope that it will be useful,
+;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;;;; Lesser General Public License for more details.
+;;;;
+;;;; You should have received a copy of the GNU Lesser General Public
+;;;; License along with this library; if not, write to the Free Software
+;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+(define-module (test-suite reader)
+  :use-module (test-suite lib))
+
 
 (define exception:eof
   (cons 'read-error "end of file$"))
-
 (define exception:unexpected-rparen
   (cons 'read-error "unexpected \")\"$"))
+(define exception:unterminated-block-comment
+  (cons 'read-error "unterminated `#! ... !#' comment$"))
+(define exception:unknown-character-name
+  (cons 'read-error "unknown character name .*$"))
+(define exception:unknown-sharp-object
+  (cons 'read-error "Unknown # object: .*$"))
+(define exception:eof-in-string
+  (cons 'read-error "end of file in string constant$"))
+(define exception:illegal-escape
+  (cons 'read-error "illegal character in escape sequence: .*$"))
+
 
 (define (read-string s)
   (with-input-from-string s (lambda () (read))))
 
+(define (with-read-options opts thunk)
+  (let ((saved-options (read-options)))
+    (dynamic-wind
+        (lambda ()
+          (read-options opts))
+        thunk
+        (lambda ()
+          (read-options saved-options)))))
+
+
 (with-test-prefix "reading"
   (pass-if "0"
     (equal? (read-string "0") 0))
@@ -31,8 +71,18 @@
 	   (lambda (key subr message args rest)
 	     (apply format #f message args)
 	     ;; message and args are ok
-	     #t))))
+	     #t)))
 
+  (pass-if "block comment"
+    (equal? '(+ 1 2 3)
+            (read-string "(+ 1 #! this is a\ncomment !# 2 3)")))
+
+  (pass-if "unprintable symbol"
+    ;; The reader tolerates unprintable characters for symbols.
+    (equal? (string->symbol "\001\002\003")
+            (read-string "\001\002\003"))))
+
+
 (pass-if-exception "radix passed to number->string can't be zero"
   exception:out-of-range
   (number->string 10 0))
@@ -40,6 +90,7 @@
   exception:out-of-range
   (number->string 10 1))
 
+
 (with-test-prefix "mismatching parentheses"
   (pass-if-exception "opening parenthesis"
     exception:eof
@@ -53,3 +104,53 @@
   (pass-if-exception "closing parenthesis following mismatched vector opening"
      exception:unexpected-rparen
      (read-string ")")))
+
+
+(with-test-prefix "exceptions"
+
+  ;; Reader exceptions: although they are not documented, they may be relied
+  ;; on by some programs, hence these tests.
+
+  (pass-if-exception "unterminated block comment"
+    exception:unterminated-block-comment
+    (read-string "(+ 1 #! comment\n..."))
+  (pass-if-exception "unknown character name"
+    exception:unknown-character-name
+    (read-string "#\\theunknowncharacter"))
+  (pass-if-exception "unknown sharp object"
+    exception:unknown-sharp-object
+    (read-string "#?"))
+  (pass-if-exception "eof in string"
+    exception:eof-in-string
+    (read-string "\"the string that never ends"))
+  (pass-if-exception "illegal escape in string"
+    exception:illegal-escape
+    (read-string "\"some string \\???\"")))
+
+
+(with-test-prefix "read-options"
+  (pass-if "case-sensitive"
+    (not (eq? 'guile 'GuiLe)))
+  (pass-if "case-insensitive"
+    (eq? 'guile
+         (with-read-options '(case-insensitive)
+           (lambda ()
+             (read-string "GuiLe")))))
+  (pass-if "prefix keywords"
+    (eq? #:keyword
+         (with-read-options '(keywords prefix case-insensitive)
+           (lambda ()
+             (read-string ":KeyWord")))))
+  (pass-if "no positions"
+    (let ((sexp (with-read-options '()
+                  (lambda ()
+                    (read-string "(+ 1 2 3)")))))
+      (and (not (source-property sexp 'line))
+           (not (source-property sexp 'column)))))
+  (pass-if "positions"
+    (let ((sexp (with-read-options '(positions)
+                  (lambda ()
+                    (read-string "(+ 1 2 3)")))))
+      (and (equal? (source-property sexp 'line) 0)
+           (equal? (source-property sexp 'column) 0)))))
+