--- orig/ChangeLog +++ mod/ChangeLog @@ -1,3 +1,7 @@ +2007-07-22 Ludovic Courtès + + * configure.in: Check for and `strncasecmp ()'. + 2007-07-19 Ludovic Courtès * NEWS: Mention `(ice-9 i18n)' and lazy duplicate binding --- orig/configure.in +++ mod/configure.in @@ -546,7 +546,7 @@ regex.h rxposix.h rx/rxposix.h sys/dir.h sys/ioctl.h sys/select.h \ sys/time.h sys/timeb.h sys/times.h sys/stdtypes.h sys/types.h \ sys/utime.h time.h unistd.h utime.h pwd.h grp.h sys/utsname.h \ -direct.h langinfo.h nl_types.h]) +strings.h direct.h langinfo.h nl_types.h]) # "complex double" is new in C99, and "complex" is only a keyword if # is included @@ -638,7 +638,7 @@ # strcoll_l, newlocale - GNU extensions (glibc), also available on Darwin # nl_langinfo - X/Open, not available on Windows. # -AC_CHECK_FUNCS([DINFINITY DQNAN chsize clog10 ctermid fesetround ftime ftruncate fchown getcwd geteuid gettimeofday gmtime_r ioctl lstat mkdir mknod nice pipe _pipe readdir_r readlink rename rmdir select setegid seteuid setlocale setpgid setsid sigaction siginterrupt stat64 strftime strptime symlink sync sysconf tcgetpgrp tcsetpgrp times uname waitpid strdup system usleep atexit on_exit chown link fcntl ttyname getpwent getgrent kill getppid getpgrp fork setitimer getitimer strchr strcmp index bcopy memcpy rindex truncate unsetenv isblank _NSGetEnviron strcoll strcoll_l newlocale nl_langinfo]) +AC_CHECK_FUNCS([DINFINITY DQNAN chsize clog10 ctermid fesetround ftime ftruncate fchown getcwd geteuid gettimeofday gmtime_r ioctl lstat mkdir mknod nice pipe _pipe readdir_r readlink rename rmdir select setegid seteuid setlocale setpgid setsid sigaction siginterrupt stat64 strftime strptime symlink sync sysconf tcgetpgrp tcsetpgrp times uname waitpid strdup system usleep atexit on_exit chown link fcntl ttyname getpwent getgrent kill getppid getpgrp fork setitimer getitimer strchr strcmp index bcopy memcpy rindex truncate unsetenv isblank _NSGetEnviron strncasecmp strcoll strcoll_l newlocale nl_langinfo]) # Reasons for testing: # netdb.h - not in mingw --- orig/libguile/ChangeLog +++ mod/libguile/ChangeLog @@ -1,3 +1,18 @@ +2007-07-22 Ludovic Courtès + + Overhauled the reader, making it faster. + + * gdbint.c (tok_buf, tok_buf_mark_p): Removed. + (gdb_read): Don't use a token buffer. Use `scm_read ()' instead + of `scm_lreadr ()'. + + * read.c: Overhauled. No longer use a token buffer. Use a + on-stack C buffer in the common case and use Scheme strings when + larger buffers are needed. + * read.h (scm_grow_tok_buf, scm_flush_ws, scm_casei_streq, + scm_lreadr, scm_lreadrecparen): Removed. + (scm_i_input_error): Marked as `SCM_NORETURN'. + 2007-07-15 Ludovic Courtès * script.c (scm_compile_shell_switches): Updated copyright year. --- orig/libguile/gdbint.c +++ mod/libguile/gdbint.c @@ -103,9 +103,6 @@ static SCM gdb_input_port; static int port_mark_p, stream_mark_p, string_mark_p; -static SCM tok_buf; -static int tok_buf_mark_p; - static SCM gdb_output_port; @@ -184,10 +181,9 @@ scm_puts (str, gdb_input_port); scm_truncate_file (gdb_input_port, SCM_UNDEFINED); scm_seek (gdb_input_port, SCM_INUM0, scm_from_int (SEEK_SET)); + /* Read one object */ - tok_buf_mark_p = SCM_GC_MARK_P (tok_buf); - SCM_CLEAR_GC_MARK (tok_buf); - ans = scm_lreadr (&tok_buf, gdb_input_port, &ans); + ans = scm_read (gdb_input_port); if (SCM_GC_P) { if (SCM_NIMP (ans)) @@ -202,8 +198,6 @@ if (SCM_NIMP (ans)) scm_permanent_object (ans); exit: - if (tok_buf_mark_p) - SCM_SET_GC_MARK (tok_buf); remark_port (gdb_input_port); SCM_END_FOREIGN_BLOCK; return status; @@ -292,8 +286,6 @@ SCM_OPN | SCM_RDNG | SCM_WRTNG, s); gdb_input_port = scm_permanent_object (port); - - tok_buf = scm_permanent_object (scm_c_make_string (30, SCM_UNDEFINED)); } /* --- orig/libguile/read.c +++ mod/libguile/read.c @@ -19,7 +19,17 @@ +#ifdef HAVE_CONFIG_H +# include +#endif + #include +#include +#include +#ifdef HAVE_STRINGS_H +# include +#endif + #include "libguile/_scm.h" #include "libguile/chars.h" #include "libguile/eval.h" @@ -36,6 +46,7 @@ #include "libguile/vectors.h" #include "libguile/validate.h" #include "libguile/srfi-4.h" +#include "libguile/srfi-13.h" #include "libguile/read.h" #include "libguile/private-options.h" @@ -124,77 +135,114 @@ /* An association list mapping extra hash characters to procedures. */ static SCM *scm_read_hash_procedures; -SCM_DEFINE (scm_read, "read", 0, 1, 0, - (SCM port), - "Read an s-expression from the input port @var{port}, or from\n" - "the current input port if @var{port} is not specified.\n" - "Any whitespace before the next token is discarded.") -#define FUNC_NAME s_scm_read -{ - int c; - SCM tok_buf, copy; - if (SCM_UNBNDP (port)) - port = scm_current_input_port (); - SCM_VALIDATE_OPINPORT (1, port); + +/* Token readers. */ - c = scm_flush_ws (port, (char *) NULL); - if (EOF == c) - return SCM_EOF_VAL; - scm_ungetc (c, port); - tok_buf = scm_c_make_string (30, SCM_UNDEFINED); - return scm_lreadr (&tok_buf, port, ©); -} -#undef FUNC_NAME +/* Size of the C buffer used to read symbols and numbers. */ +#define READER_BUFFER_SIZE 128 +/* Size of the C buffer used to read strings. */ +#define READER_STRING_BUFFER_SIZE 512 +/* The maximum size of Scheme character names. */ +#define READER_CHAR_NAME_MAX_SIZE 50 + + +/* `isblank' is only in C99. */ +#define CHAR_IS_BLANK_(_chr) \ + (((_chr) == ' ') || ((_chr) == '\t') || ((_chr) == '\n') \ + || ((_chr) == '\f')) + +#ifdef MSDOS +# define CHAR_IS_BLANK(_chr) \ + ((CHAR_IS_BLANK_ (chr)) || ((_chr) == 26)) +#else +# define CHAR_IS_BLANK CHAR_IS_BLANK_ +#endif + + +/* R5RS one-character delimiters (see section 7.1.1, ``Lexical + structure''). */ +#define CHAR_IS_R5RS_DELIMITER(c) \ + (CHAR_IS_BLANK (c) \ + || (c == ')') || (c == '(') || (c == ';') || (c == '"')) + +#define CHAR_IS_DELIMITER CHAR_IS_R5RS_DELIMITER + +/* Exponent markers, as defined in section 7.1.1 of R5RS, ``Lexical + Structure''. */ +#define CHAR_IS_EXPONENT_MARKER(_chr) \ + (((_chr) == 'e') || ((_chr) == 's') || ((_chr) == 'f') \ + || ((_chr) == 'd') || ((_chr) == 'l')) + +/* An inlinable version of `scm_c_downcase ()'. */ +#define CHAR_DOWNCASE(_chr) \ + (((_chr) <= UCHAR_MAX) ? tolower (_chr) : (_chr)) -char * -scm_grow_tok_buf (SCM *tok_buf) -{ - size_t oldlen = scm_i_string_length (*tok_buf); - const char *olddata = scm_i_string_chars (*tok_buf); - char *newdata; - SCM newstr = scm_i_make_string (2 * oldlen, &newdata); - size_t i; - for (i = 0; i != oldlen; ++i) - newdata[i] = olddata[i]; +#ifndef HAVE_STRNCASECMP +/* XXX: Use Gnulib's `strncasecmp ()'. */ + +static int +strncasecmp (const char *s1, const char *s2, size_t len2) +{ + while (*s1 && *s2 && len2 > 0) + { + int c1 = *s1, c2 = *s2; - *tok_buf = newstr; - return newdata; + if (CHAR_DOWNCASE (c1) != CHAR_DOWNCASE (c2)) + return 0; + else + { + ++s1; + ++s2; + --len2; + } + } + return !(*s1 || *s2 || len2 > 0); } +#endif -/* Consume an SCSH-style block comment. Assume that we've already - read the initial `#!', and eat characters until we get a - exclamation-point/sharp-sign sequence. -*/ -static void -skip_scsh_block_comment (SCM port) +/* Helper function similar to `scm_read_token ()'. Read from PORT until a + whitespace is read. Return zero if the whole token could fit in BUF, + non-zero otherwise. */ +static inline int +read_token (SCM port, char *buf, size_t buf_size, size_t *read) { - int bang_seen = 0; + *read = 0; - for (;;) + while (*read < buf_size) { - int c = scm_getc (port); - - if (c == EOF) - scm_i_input_error ("skip_block_comment", port, - "unterminated `#! ... !#' comment", SCM_EOL); + int chr; - if (c == '!') - bang_seen = 1; - else if (c == '#' && bang_seen) - return; + chr = scm_getc (port); + chr = (SCM_CASE_INSENSITIVE_P ? CHAR_DOWNCASE (chr) : chr); + + if (chr == EOF) + return 0; + else if (CHAR_IS_DELIMITER (chr)) + { + scm_ungetc (chr, port); + return 0; + } else - bang_seen = 0; + { + *buf = (char) chr; + buf++, (*read)++; + } } + + return 1; } -int -scm_flush_ws (SCM port, const char *eoferr) + +/* Skip whitespace from PORT and return the first non-whitespace character + read. Raise an error on end-of-file. */ +static int +flush_ws (SCM port, const char *eoferr) { register int c; while (1) @@ -210,6 +258,7 @@ SCM_EOL); } return c; + case ';': lp: switch (c = scm_getc (port)) @@ -222,675 +271,879 @@ break; } break; - case '#': - switch (c = scm_getc (port)) - { - case EOF: - eoferr = "read_sharp"; - goto goteof; - case '!': - skip_scsh_block_comment (port); - break; - default: - scm_ungetc (c, port); - return '#'; - } - break; + case SCM_LINE_INCREMENTORS: case SCM_SINGLE_SPACES: case '\t': break; + default: return c; } + + return 0; } + +/* Token readers. */ -int -scm_casei_streq (char *s1, char *s2) -{ - while (*s1 && *s2) - if (scm_c_downcase((int)*s1) != scm_c_downcase((int)*s2)) - return 0; - else - { - ++s1; - ++s2; - } - return !(*s1 || *s2); -} +static SCM scm_read_expression (SCM port); +static SCM scm_read_sharp (int chr, SCM port); +static SCM scm_get_hash_procedure (int c); +static SCM recsexpr (SCM obj, long line, int column, SCM filename); -static int -scm_i_casei_streq (const char *s1, const char *s2, size_t len2) -{ - while (*s1 && len2 > 0) - if (scm_c_downcase((int)*s1) != scm_c_downcase((int)*s2)) - return 0; - else - { - ++s1; - ++s2; - --len2; - } - return !(*s1 || len2 > 0); -} -/* recsexpr is used when recording expressions - * constructed by read:sharp. - */ static SCM -recsexpr (SCM obj, long line, int column, SCM filename) +scm_read_sexp (int chr, SCM port) +#define FUNC_NAME "scm_i_lreadparen" { - if (!scm_is_pair(obj)) { - return obj; - } else { - SCM tmp = obj, copy; - /* If this sexpr is visible in the read:sharp source, we want to - keep that information, so only record non-constant cons cells - which haven't previously been read by the reader. */ - if (scm_is_false (scm_whash_lookup (scm_source_whash, obj))) - { - if (SCM_COPY_SOURCE_P) - { - copy = scm_cons (recsexpr (SCM_CAR (obj), line, column, filename), - SCM_UNDEFINED); - while ((tmp = SCM_CDR (tmp)) && scm_is_pair (tmp)) - { - SCM_SETCDR (copy, scm_cons (recsexpr (SCM_CAR (tmp), - line, - column, - filename), - SCM_UNDEFINED)); - copy = SCM_CDR (copy); - } - SCM_SETCDR (copy, tmp); - } - else - { - recsexpr (SCM_CAR (obj), line, column, filename); - while ((tmp = SCM_CDR (tmp)) && scm_is_pair (tmp)) - recsexpr (SCM_CAR (tmp), line, column, filename); - copy = SCM_UNDEFINED; - } - scm_whash_insert (scm_source_whash, - obj, - scm_make_srcprops (line, - column, - filename, - copy, - SCM_EOL)); - } - return obj; - } -} + register int c; + register SCM tmp; + register SCM tl, ans = SCM_EOL; + SCM tl2 = SCM_EOL, ans2 = SCM_EOL, copy = SCM_BOOL_F;; + static const int terminating_char = ')'; + + /* Need to capture line and column numbers here. */ + long line = SCM_LINUM (port); + int column = SCM_COL (port) - 1; -static SCM scm_get_hash_procedure(int c); -static SCM scm_i_lreadparen (SCM *, SCM, char *, SCM *, char); + c = flush_ws (port, FUNC_NAME); + if (terminating_char == c) + return SCM_EOL; -static char s_list[]="list"; -#if SCM_ENABLE_ELISP -static char s_vector[]="vector"; -#endif + scm_ungetc (c, port); + if (scm_is_eq (scm_sym_dot, + (tmp = scm_read_expression (port)))) + { + ans = scm_read_expression (port); + if (terminating_char != (c = flush_ws (port, FUNC_NAME))) + scm_i_input_error (FUNC_NAME, port, "missing close paren", + SCM_EOL); + return ans; + } -SCM -scm_lreadr (SCM *tok_buf, SCM port, SCM *copy) -#define FUNC_NAME "scm_lreadr" -{ - int c; - size_t j; - SCM p; - - tryagain: - c = scm_flush_ws (port, s_scm_read); - switch (c) + /* Build the head of the list structure. */ + ans = tl = scm_cons (tmp, SCM_EOL); + + if (SCM_COPY_SOURCE_P) + ans2 = tl2 = scm_cons (scm_is_pair (tmp) + ? copy + : tmp, + SCM_EOL); + + while (terminating_char != (c = flush_ws (port, FUNC_NAME))) { - case EOF: - return SCM_EOF_VAL; + SCM new_tail; - case '(': - return SCM_RECORD_POSITIONS_P - ? scm_lreadrecparen (tok_buf, port, s_list, copy) - : scm_i_lreadparen (tok_buf, port, s_list, copy, ')'); - case ')': - scm_i_input_error (FUNC_NAME, port,"unexpected \")\"", SCM_EOL); - goto tryagain; - -#if SCM_ENABLE_ELISP - case '[': - if (SCM_ELISP_VECTORS_P) - { - p = scm_i_lreadparen (tok_buf, port, s_vector, copy, ']'); - return scm_is_null (p) ? scm_nullvect : scm_vector (p); - } - goto read_token; -#endif - case '\'': - p = scm_sym_quote; - goto recquote; - case '`': - p = scm_sym_quasiquote; - goto recquote; - case ',': - c = scm_getc (port); - if ('@' == c) - p = scm_sym_uq_splicing; - else + scm_ungetc (c, port); + if (scm_is_eq (scm_sym_dot, + (tmp = scm_read_expression (port)))) { - scm_ungetc (c, port); - p = scm_sym_unquote; + SCM_SETCDR (tl, tmp = scm_read_expression (port)); + + if (SCM_COPY_SOURCE_P) + SCM_SETCDR (tl2, scm_cons (scm_is_pair (tmp) ? copy : tmp, + SCM_EOL)); + + c = flush_ws (port, FUNC_NAME); + if (terminating_char != c) + scm_i_input_error (FUNC_NAME, port, + "in pair: missing close paren", SCM_EOL); + goto exit; } - recquote: - p = scm_cons2 (p, - scm_lreadr (tok_buf, port, copy), - SCM_EOL); - if (SCM_RECORD_POSITIONS_P) - scm_whash_insert (scm_source_whash, - p, - scm_make_srcprops (SCM_LINUM (port), - SCM_COL (port) - 1, - SCM_FILENAME (port), - SCM_COPY_SOURCE_P - ? (*copy = scm_cons2 (SCM_CAR (p), - SCM_CAR (SCM_CDR (p)), - SCM_EOL)) - : SCM_UNDEFINED, - SCM_EOL)); - return p; - case '#': - c = scm_getc (port); - { - /* Check for user-defined hash procedure first, to allow - overriding of builtin hash read syntaxes. */ - SCM sharp = scm_get_hash_procedure (c); - if (scm_is_true (sharp)) - { - long line = SCM_LINUM (port); - int column = SCM_COL (port) - 2; - SCM got; - - got = scm_call_2 (sharp, SCM_MAKE_CHAR (c), port); - if (scm_is_eq (got, SCM_UNSPECIFIED)) - goto handle_sharp; - if (SCM_RECORD_POSITIONS_P) - return *copy = recsexpr (got, line, column, - SCM_FILENAME (port)); - else - return got; - } - } - handle_sharp: - switch (c) + new_tail = scm_cons (tmp, SCM_EOL); + SCM_SETCDR (tl, new_tail); + tl = new_tail; + + if (SCM_COPY_SOURCE_P) { - /* Vector, arrays, both uniform and not are handled by this - one function. It also disambiguates between '#f' and - '#f32' and '#f64'. - */ - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - case 'u': case 's': case 'f': - case '@': - case '(': -#if SCM_ENABLE_DEPRECATED - /* See below for 'i' and 'e'. */ - case 'a': - case 'c': - case 'y': - case 'h': - case 'l': -#endif - return scm_i_read_array (port, c); + SCM new_tail2 = scm_cons (scm_is_pair (tmp) + ? copy + : tmp, SCM_EOL); + SCM_SETCDR (tl2, new_tail2); + tl2 = new_tail2; + } + } - case 't': - case 'T': - return SCM_BOOL_T; + exit: + if (SCM_RECORD_POSITIONS_P) + scm_whash_insert (scm_source_whash, + ans, + scm_make_srcprops (line, column, + SCM_FILENAME (port), + SCM_COPY_SOURCE_P + ? ans2 + : SCM_UNDEFINED, + SCM_EOL)); + return ans; +} +#undef FUNC_NAME - case 'F': - /* See above for lower case 'f'. */ - return SCM_BOOL_F; +static SCM +scm_read_string (int chr, SCM port) +#define FUNC_NAME "scm_lreadr" +{ + /* For strings smaller than C_STR, this function creates only one Scheme + object (the string returned). */ + SCM str = SCM_BOOL_F; + char c_str[READER_STRING_BUFFER_SIZE]; + unsigned c_str_len = 0; + int c; - case 'i': - case 'e': -#if SCM_ENABLE_DEPRECATED - { - /* When next char is '(', it really is an old-style - uniform array. */ - int next_c = scm_getc (port); - if (next_c != EOF) - scm_ungetc (next_c, port); - if (next_c == '(') - return scm_i_read_array (port, c); - /* Fall through. */ - } -#endif - case 'b': - case 'B': - case 'o': - case 'O': - case 'd': - case 'D': - case 'x': - case 'X': - case 'I': - case 'E': - scm_ungetc (c, port); - c = '#'; - goto num; - - case '!': - /* should never happen, #!...!# block comments are skipped - over in scm_flush_ws. */ - abort (); - - case '*': - j = scm_read_token (c, tok_buf, port, 0); - p = scm_istr2bve (scm_c_substring_shared (*tok_buf, 1, j)); - if (scm_is_true (p)) - return p; - else - goto unkshrp; + while ('"' != (c = scm_getc (port))) + { + if (c == EOF) + str_eof: scm_i_input_error (FUNC_NAME, port, + "end of file in string constant", + SCM_EOL); - case '{': - j = scm_read_token (c, tok_buf, port, 1); - return scm_string_to_symbol (scm_c_substring_copy (*tok_buf, 0, j)); - - case '\\': - c = scm_getc (port); - j = scm_read_token (c, tok_buf, port, 0); - if (j == 1) - return SCM_MAKE_CHAR (c); - if (c >= '0' && c < '8') - { - /* Dirk:FIXME:: This type of character syntax is not R5RS - * compliant. Further, it should be verified that the constant - * does only consist of octal digits. Finally, it should be - * checked whether the resulting fixnum is in the range of - * characters. */ - p = scm_c_locale_stringn_to_number (scm_i_string_chars (*tok_buf), - j, 8); - if (SCM_I_INUMP (p)) - return SCM_MAKE_CHAR (SCM_I_INUM (p)); - } - for (c = 0; c < scm_n_charnames; c++) - if (scm_charnames[c] - && (scm_i_casei_streq (scm_charnames[c], - scm_i_string_chars (*tok_buf), j))) - return SCM_MAKE_CHAR (scm_charnums[c]); - scm_i_input_error (FUNC_NAME, port, "unknown character name ~a", - scm_list_1 (scm_c_substring (*tok_buf, 0, j))); + if (c_str_len + 1 >= sizeof (c_str)) + { + /* Flush the C buffer onto a Scheme string. */ + SCM addy; - /* #:SYMBOL is a syntax for keywords supported in all contexts. */ - case ':': - return scm_symbol_to_keyword (scm_read (port)); + if (str == SCM_BOOL_F) + str = scm_c_make_string (0, SCM_MAKE_CHAR ('X')); - default: - callshrp: - { - SCM sharp = scm_get_hash_procedure (c); + addy = scm_from_locale_stringn (c_str, c_str_len); + str = scm_string_append_shared (scm_list_2 (str, addy)); - if (scm_is_true (sharp)) - { - long line = SCM_LINUM (port); - int column = SCM_COL (port) - 2; - SCM got; - - got = scm_call_2 (sharp, SCM_MAKE_CHAR (c), port); - if (scm_is_eq (got, SCM_UNSPECIFIED)) - goto unkshrp; - if (SCM_RECORD_POSITIONS_P) - return *copy = recsexpr (got, line, column, - SCM_FILENAME (port)); - else - return got; - } - } - unkshrp: - scm_i_input_error (FUNC_NAME, port, "Unknown # object: ~S", - scm_list_1 (SCM_MAKE_CHAR (c))); + c_str_len = 0; } - case '"': - j = 0; - while ('"' != (c = scm_getc (port))) - { - if (c == EOF) - str_eof: scm_i_input_error (FUNC_NAME, port, - "end of file in string constant", - SCM_EOL); - - while (j + 2 >= scm_i_string_length (*tok_buf)) - scm_grow_tok_buf (tok_buf); - - if (c == '\\') - switch (c = scm_getc (port)) - { - case EOF: - goto str_eof; - case '"': - case '\\': - break; + if (c == '\\') + switch (c = scm_getc (port)) + { + case EOF: + goto str_eof; + case '"': + case '\\': + break; #if SCM_ENABLE_ELISP - case '(': - case ')': - if (SCM_ESCAPED_PARENS_P) - break; - goto bad_escaped; + case '(': + case ')': + if (SCM_ESCAPED_PARENS_P) + break; + goto bad_escaped; #endif - case '\n': - continue; - case '0': - c = '\0'; - break; - case 'f': - c = '\f'; - break; - case 'n': - c = '\n'; - break; - case 'r': - c = '\r'; - break; - case 't': - c = '\t'; - break; - case 'a': - c = '\007'; - break; - case 'v': - c = '\v'; - break; - case 'x': - { - int a, b; - a = scm_getc (port); - if (a == EOF) goto str_eof; - b = scm_getc (port); - if (b == EOF) goto str_eof; - if ('0' <= a && a <= '9') a -= '0'; - else if ('A' <= a && a <= 'F') a = a - 'A' + 10; - else if ('a' <= a && a <= 'f') a = a - 'a' + 10; - else goto bad_escaped; - if ('0' <= b && b <= '9') b -= '0'; - else if ('A' <= b && b <= 'F') b = b - 'A' + 10; - else if ('a' <= b && b <= 'f') b = b - 'a' + 10; - else goto bad_escaped; - c = a * 16 + b; - break; - } - default: - bad_escaped: - scm_i_input_error(FUNC_NAME, port, - "illegal character in escape sequence: ~S", - scm_list_1 (SCM_MAKE_CHAR (c))); - } - scm_c_string_set_x (*tok_buf, j, SCM_MAKE_CHAR (c)); - ++j; - } - if (j == 0) - return scm_nullstr; - - /* Change this to scm_c_substring_read_only when - SCM_STRING_CHARS has been removed. - */ - return scm_c_substring_copy (*tok_buf, 0, j); - - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - case '.': - case '-': - case '+': - num: - j = scm_read_token (c, tok_buf, port, 0); - if (j == 1 && (c == '+' || c == '-')) - /* Shortcut: Detected symbol '+ or '- */ - goto tok; - - p = scm_c_locale_stringn_to_number (scm_i_string_chars (*tok_buf), j, 10); - if (scm_is_true (p)) - return p; - if (c == '#') - { - if ((j == 2) && (scm_getc (port) == '(')) + case '\n': + continue; + case '0': + c = '\0'; + break; + case 'f': + c = '\f'; + break; + case 'n': + c = '\n'; + break; + case 'r': + c = '\r'; + break; + case 't': + c = '\t'; + break; + case 'a': + c = '\007'; + break; + case 'v': + c = '\v'; + break; + case 'x': { - scm_ungetc ('(', port); - c = scm_i_string_chars (*tok_buf)[1]; - goto callshrp; + int a, b; + a = scm_getc (port); + if (a == EOF) goto str_eof; + b = scm_getc (port); + if (b == EOF) goto str_eof; + if ('0' <= a && a <= '9') a -= '0'; + else if ('A' <= a && a <= 'F') a = a - 'A' + 10; + else if ('a' <= a && a <= 'f') a = a - 'a' + 10; + else goto bad_escaped; + if ('0' <= b && b <= '9') b -= '0'; + else if ('A' <= b && b <= 'F') b = b - 'A' + 10; + else if ('a' <= b && b <= 'f') b = b - 'a' + 10; + else goto bad_escaped; + c = a * 16 + b; + break; } - scm_i_input_error (FUNC_NAME, port, "unknown # object", SCM_EOL); - } - goto tok; - - case ':': - if (scm_is_eq (SCM_PACK (SCM_KEYWORD_STYLE), scm_keyword_prefix)) - return scm_symbol_to_keyword (scm_read (port)); + default: + bad_escaped: + scm_i_input_error (FUNC_NAME, port, + "illegal character in escape sequence: ~S", + scm_list_1 (SCM_MAKE_CHAR (c))); + } + c_str[c_str_len++] = c; + } - /* fallthrough */ - default: -#if SCM_ENABLE_ELISP - read_token: -#endif - j = scm_read_token (c, tok_buf, port, 0); - /* fallthrough */ + if (c_str_len > 0) + { + SCM addy; - tok: - return scm_string_to_symbol (scm_c_substring (*tok_buf, 0, j)); + addy = scm_from_locale_stringn (c_str, c_str_len); + if (str == SCM_BOOL_F) + str = addy; + else + str = scm_string_append_shared (scm_list_2 (str, addy)); } + else + str = (str == SCM_BOOL_F) ? scm_nullstr : str; + + return str; } #undef FUNC_NAME -#ifdef _UNICOS -_Pragma ("noopt"); /* # pragma _CRI noopt */ -#endif - -size_t -scm_read_token (int ic, SCM *tok_buf, SCM port, int weird) +static SCM +scm_read_number (int chr, SCM port) { - size_t j; - int c; + SCM result, str = SCM_EOL; + char buffer[READER_BUFFER_SIZE]; + size_t read; + int overflow = 0; - c = (SCM_CASE_INSENSITIVE_P ? scm_c_downcase(ic) : ic); - - if (weird) - j = 0; - else + scm_ungetc (chr, port); + do { - j = 0; - while (j + 2 >= scm_i_string_length (*tok_buf)) - scm_grow_tok_buf (tok_buf); - scm_c_string_set_x (*tok_buf, j, SCM_MAKE_CHAR (c)); - ++j; + overflow = read_token (port, buffer, sizeof (buffer), &read); + + if ((overflow) || (scm_is_pair (str))) + str = scm_cons (scm_from_locale_stringn (buffer, read), str); } + while (overflow); - while (1) + if (scm_is_pair (str)) { - while (j + 2 >= scm_i_string_length (*tok_buf)) - scm_grow_tok_buf (tok_buf); - c = scm_getc (port); - switch (c) - { - case '(': - case ')': -#if SCM_ENABLE_ELISP - case '[': - case ']': -#endif - case '"': - case ';': - case SCM_WHITE_SPACES: - case SCM_LINE_INCREMENTORS: - if (weird -#if SCM_ENABLE_ELISP - || ((!SCM_ELISP_VECTORS_P) && ((c == '[') || (c == ']'))) -#endif - ) - goto default_case; + /* The slow path. */ - scm_ungetc (c, port); - case EOF: - eof_case: - return j; - case '\\': - if (!weird) - goto default_case; - else - { - c = scm_getc (port); - if (c == EOF) - goto eof_case; - else - goto default_case; - } - case '}': - if (!weird) - goto default_case; + str = scm_string_concatenate (scm_reverse_x (str, SCM_EOL)); + result = scm_string_to_number (str, SCM_UNDEFINED); + if (!scm_is_true (result)) + /* Return a symbol instead of a number. */ + result = scm_string_to_symbol (str); + } + else + { + result = scm_c_locale_stringn_to_number (buffer, read, 10); + if (!scm_is_true (result)) + /* Return a symbol instead of a number. */ + result = scm_from_locale_symboln (buffer, read); + } - c = scm_getc (port); - if (c == '#') - { - return j; - } - else - { - scm_ungetc (c, port); - c = '}'; - goto default_case; - } + return result; +} - default: - default_case: +static SCM +scm_read_mixed_case_symbol (int chr, SCM port) +{ + SCM result, str = SCM_EOL; + int overflow = 0; + char buffer[READER_BUFFER_SIZE]; + size_t read = 0; + + scm_ungetc (chr, port); + do + { + overflow = read_token (port, buffer, sizeof (buffer), &read); + + if ((overflow) || (scm_is_pair (str))) + str = scm_cons (scm_from_locale_stringn (buffer, read), str); + } + while (overflow); + + if (scm_is_pair (str)) + { + str = scm_string_concatenate (scm_reverse_x (str, SCM_EOL)); + result = scm_string_to_symbol (str); + } + else + /* For symbols smaller than `sizeof (buffer)', we don't need to recur to + Scheme strings. Therefore, we only create one Scheme object (a + symbol) per symbol read. */ + result = scm_from_locale_symboln (buffer, read); + + return result; +} + +static SCM +scm_read_number_and_radix (int chr, SCM port) +#define FUNC_NAME "scm_lreadr" +{ + SCM result, str = SCM_EOL; + size_t read; + char buffer[READER_BUFFER_SIZE]; + unsigned int radix; + int overflow = 0; + + switch (chr) + { + case 'B': + case 'b': + radix = 2; + break; + + case 'o': + case 'O': + radix = 8; + break; + + case 'd': + case 'D': + radix = 10; + break; + + case 'x': + case 'X': + radix = 16; + break; + + default: + scm_ungetc (chr, port); + scm_ungetc ('#', port); + radix = 10; + } + + do + { + overflow = read_token (port, buffer, sizeof (buffer), &read); + + if ((overflow) || (scm_is_pair (str))) + str = scm_cons (scm_from_locale_stringn (buffer, read), str); + } + while (overflow); + + if (scm_is_pair (str)) + { + str = scm_string_concatenate (scm_reverse_x (str, SCM_EOL)); + result = scm_string_to_number (str, scm_from_uint (radix)); + } + else + result = scm_c_locale_stringn_to_number (buffer, read, radix); + + if (scm_is_true (result)) + return result; + + scm_i_input_error (FUNC_NAME, port, "unknown # object", SCM_EOL); + + return SCM_BOOL_F; +} +#undef FUNC_NAME + +static SCM +scm_read_quote (int chr, SCM port) +{ + SCM p; + + switch (chr) + { + case '`': + p = scm_sym_quasiquote; + break; + + case '\'': + p = scm_sym_quote; + break; + + case ',': + { + int c; + + c = scm_getc (port); + if ('@' == c) + p = scm_sym_uq_splicing; + else { - c = (SCM_CASE_INSENSITIVE_P ? scm_c_downcase(c) : c); - scm_c_string_set_x (*tok_buf, j, SCM_MAKE_CHAR (c)); - ++j; + scm_ungetc (c, port); + p = scm_sym_unquote; } + break; + } - } + default: + fprintf (stderr, "%s: unhandled quote character (%i)\n", + __FUNCTION__, chr); + abort (); } -} -#ifdef _UNICOS -_Pragma ("opt"); /* # pragma _CRI opt */ -#endif + p = scm_cons2 (p, scm_read_expression (port), SCM_EOL); -static SCM -scm_i_lreadparen (SCM *tok_buf, SCM port, char *name, SCM *copy, char term_char) -#define FUNC_NAME "scm_i_lreadparen" + return p; +} + +static inline SCM +scm_read_semicolon_comment (int chr, SCM port) { - SCM tmp; - SCM tl; - SCM ans; int c; - c = scm_flush_ws (port, name); - if (term_char == c) - return SCM_EOL; - scm_ungetc (c, port); - if (scm_is_eq (scm_sym_dot, (tmp = scm_lreadr (tok_buf, port, copy)))) + for (c = scm_getc (port); + (c != EOF) && (c != '\n'); + c = scm_getc (port)); + + return SCM_UNSPECIFIED; +} + + +/* Sharp readers, i.e. readers called after a `#' sign has been read. */ + +static SCM +scm_read_boolean (int chr, SCM port) +{ + switch (chr) { - ans = scm_lreadr (tok_buf, port, copy); - closeit: - if (term_char != (c = scm_flush_ws (port, name))) - scm_i_input_error (FUNC_NAME, port, "missing close paren", SCM_EOL); - return ans; + case 't': + case 'T': + return SCM_BOOL_T; + + case 'f': + case 'F': + return SCM_BOOL_F; } - ans = tl = scm_cons (tmp, SCM_EOL); - while (term_char != (c = scm_flush_ws (port, name))) + + return SCM_UNSPECIFIED; +} + +static SCM +scm_read_character (int chr, SCM port) +#define FUNC_NAME "scm_lreadr" +{ + unsigned c; + char charname[READER_CHAR_NAME_MAX_SIZE]; + size_t charname_len; + + if (read_token (port, charname, sizeof (charname), &charname_len)) + goto char_error; + + if (charname_len == 0) { - scm_ungetc (c, port); - if (scm_is_eq (scm_sym_dot, (tmp = scm_lreadr (tok_buf, port, copy)))) - { - SCM_SETCDR (tl, scm_lreadr (tok_buf, port, copy)); - goto closeit; - } - SCM_SETCDR (tl, scm_cons (tmp, SCM_EOL)); - tl = SCM_CDR (tl); + chr = scm_getc (port); + if (chr == EOF) + scm_i_input_error (FUNC_NAME, port, "unexpected end of file " + "while reading character", SCM_EOL); + + /* CHR must be a token delimiter, like a whitespace. */ + return (SCM_MAKE_CHAR (chr)); } - return ans; + + if (charname_len == 1) + return SCM_MAKE_CHAR (charname[0]); + + if (*charname >= '0' && *charname < '8') + { + /* Dirk:FIXME:: This type of character syntax is not R5RS + * compliant. Further, it should be verified that the constant + * does only consist of octal digits. Finally, it should be + * checked whether the resulting fixnum is in the range of + * characters. */ + SCM p = scm_c_locale_stringn_to_number (charname, charname_len, 8); + if (SCM_I_INUMP (p)) + return SCM_MAKE_CHAR (SCM_I_INUM (p)); + } + + for (c = 0; c < scm_n_charnames; c++) + if (scm_charnames[c] + && (!strncasecmp (scm_charnames[c], charname, charname_len))) + return SCM_MAKE_CHAR (scm_charnums[c]); + + char_error: + scm_i_input_error (FUNC_NAME, port, "unknown character name ~a", + scm_list_1 (scm_from_locale_stringn (charname, + charname_len))); + + return SCM_UNSPECIFIED; } #undef FUNC_NAME +static inline SCM +scm_read_keyword (int chr, SCM port) +{ + SCM symbol; + + /* Read the symbol that comprises the keyword. Doing this instead of + invoking a specific symbol reader function allows `scm_read_keyword ()' + to adapt to the delimiters currently valid of symbols. + + XXX: This implementation allows sloppy syntaxes like `#: key'. */ + symbol = scm_read_expression (port); + if (!scm_is_symbol (symbol)) + scm_i_input_error (__FUNCTION__, port, + "keyword prefix `~a' not followed by a symbol: ~s", + scm_list_2 (SCM_MAKE_CHAR (chr), symbol)); -SCM -scm_lreadrecparen (SCM *tok_buf, SCM port, char *name, SCM *copy) -#define FUNC_NAME "scm_lreadrecparen" + return (scm_symbol_to_keyword (symbol)); +} + +static inline SCM +scm_read_vector (int chr, SCM port) { - register int c; - register SCM tmp; - register SCM tl, tl2 = SCM_EOL; - SCM ans, ans2 = SCM_EOL; - /* Need to capture line and column numbers here. */ - long line = SCM_LINUM (port); - int column = SCM_COL (port) - 1; + /* Note: We call `scm_read_sexp ()' rather than READER here in order to + guarantee that it's going to do what we want. After all, this is an + implementation detail of `scm_read_vector ()', not a desirable + property. */ + return (scm_vector (scm_read_sexp (chr, port))); +} - c = scm_flush_ws (port, name); - if (')' == c) - return SCM_EOL; - scm_ungetc (c, port); - if (scm_is_eq (scm_sym_dot, (tmp = scm_lreadr (tok_buf, port, copy)))) +static inline SCM +scm_read_srfi4_vector (int chr, SCM port) +{ + return scm_i_read_array (port, chr); +} + +static SCM +scm_read_guile_bit_vector (int chr, SCM port) +{ + /* Read the `#*10101'-style read syntax for bit vectors in Guile. This is + terribly inefficient but who cares? */ + SCM s_bits = SCM_EOL; + + for (chr = scm_getc (port); + (chr != EOF) && ((chr == '0') || (chr == '1')); + chr = scm_getc (port)) { - ans = scm_lreadr (tok_buf, port, copy); - if (')' != (c = scm_flush_ws (port, name))) - scm_i_input_error (FUNC_NAME, port, "missing close paren", SCM_EOL); - return ans; + s_bits = scm_cons ((chr == '0') ? SCM_BOOL_F : SCM_BOOL_T, s_bits); } - /* Build the head of the list structure. */ - ans = tl = scm_cons (tmp, SCM_EOL); - if (SCM_COPY_SOURCE_P) - ans2 = tl2 = scm_cons (scm_is_pair (tmp) - ? *copy - : tmp, - SCM_EOL); - while (')' != (c = scm_flush_ws (port, name))) + + if (chr != EOF) + scm_ungetc (chr, port); + + return scm_bitvector (scm_reverse_x (s_bits, SCM_EOL)); +} + +static inline SCM +scm_read_scsh_block_comment (int chr, SCM port) +{ + int bang_seen = 0; + + for (;;) { - SCM new_tail; + int c = scm_getc (port); - scm_ungetc (c, port); - if (scm_is_eq (scm_sym_dot, (tmp = scm_lreadr (tok_buf, port, copy)))) + if (c == EOF) + scm_i_input_error ("skip_block_comment", port, + "unterminated `#! ... !#' comment", SCM_EOL); + + if (c == '!') + bang_seen = 1; + else if (c == '#' && bang_seen) + break; + else + bang_seen = 0; + } + + return SCM_UNSPECIFIED; +} + +static SCM +scm_read_extended_symbol (int chr, SCM port) +{ + /* Guile's extended symbol read syntax looks like this: + + #{This is all a symbol name}# + + So here, CHR is expected to be `{'. */ + SCM result; + int saw_brace = 0, finished = 0; + size_t len = 0; + char buf[1024]; + + result = scm_c_make_string (0, SCM_MAKE_CHAR ('X')); + + while ((chr = scm_getc (port)) != EOF) + { + if (saw_brace) { - SCM_SETCDR (tl, tmp = scm_lreadr (tok_buf, port, copy)); - if (SCM_COPY_SOURCE_P) - SCM_SETCDR (tl2, scm_cons (scm_is_pair (tmp) - ? *copy - : tmp, - SCM_EOL)); - if (')' != (c = scm_flush_ws (port, name))) - scm_i_input_error (FUNC_NAME, port, - "missing close paren", SCM_EOL); - goto exit; + if (chr == '#') + { + finished = 1; + break; + } + else + { + saw_brace = 0; + buf[len++] = '}'; + buf[len++] = chr; + } } + else if (chr == '}') + saw_brace = 1; + else + buf[len++] = chr; - new_tail = scm_cons (tmp, SCM_EOL); - SCM_SETCDR (tl, new_tail); - tl = new_tail; + if (len >= sizeof (buf) - 2) + { + scm_string_append (scm_list_2 (result, + scm_from_locale_stringn (buf, len))); + len = 0; + } - if (SCM_COPY_SOURCE_P) + if (finished) + break; + } + + if (len) + result = scm_string_append (scm_list_2 + (result, + scm_from_locale_stringn (buf, len))); + + return (scm_string_to_symbol (result)); +} + + + +/* Top-level token readers, i.e., dispatchers. */ + +static SCM +scm_read_sharp_extension (int chr, SCM port) +{ + SCM proc; + + proc = scm_get_hash_procedure (chr); + if (scm_is_true (scm_procedure_p (proc))) + { + long line = SCM_LINUM (port); + int column = SCM_COL (port) - 2; + SCM got; + + got = scm_call_2 (proc, SCM_MAKE_CHAR (chr), port); + if (!scm_is_eq (got, SCM_UNSPECIFIED)) { - SCM new_tail2 = scm_cons (scm_is_pair (tmp) ? *copy : tmp, SCM_EOL); - SCM_SETCDR (tl2, new_tail2); - tl2 = new_tail2; + if (SCM_RECORD_POSITIONS_P) + return (recsexpr (got, line, column, + SCM_FILENAME (port))); + else + return got; } } -exit: - scm_whash_insert (scm_source_whash, - ans, - scm_make_srcprops (line, - column, - SCM_FILENAME (port), - SCM_COPY_SOURCE_P - ? *copy = ans2 - : SCM_UNDEFINED, - SCM_EOL)); - return ans; + + return SCM_UNSPECIFIED; +} + +/* The reader for the sharp `#' character. It basically dispatches reads + among the above token readers. */ +static SCM +scm_read_sharp (int chr, SCM port) +#define FUNC_NAME "scm_lreadr" +{ + SCM result; + + chr = scm_getc (port); + + result = scm_read_sharp_extension (chr, port); + if (!scm_is_eq (result, SCM_UNSPECIFIED)) + return result; + + switch (chr) + { + case '\\': + return (scm_read_character (chr, port)); + case '(': + return (scm_read_vector (chr, port)); + case 's': + case 'u': + case 'f': + /* This one may return either a boolean or an SRFI-4 vector. */ + return (scm_read_srfi4_vector (chr, port)); + case '*': + return (scm_read_guile_bit_vector (chr, port)); + case 't': + case 'T': + case 'F': + /* This one may return either a boolean or an SRFI-4 vector. */ + return (scm_read_boolean (chr, port)); + case ':': + return (scm_read_keyword (chr, port)); + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + case '@': +#if SCM_ENABLE_DEPRECATED + /* See below for 'i' and 'e'. */ + case 'a': + case 'c': + case 'y': + case 'h': + case 'l': +#endif + return (scm_i_read_array (port, chr)); + + case 'i': + case 'e': +#if SCM_ENABLE_DEPRECATED + { + /* When next char is '(', it really is an old-style + uniform array. */ + int next_c = scm_getc (port); + if (next_c != EOF) + scm_ungetc (next_c, port); + if (next_c == '(') + return scm_i_read_array (port, chr); + /* Fall through. */ + } +#endif + case 'b': + case 'B': + case 'o': + case 'O': + case 'd': + case 'D': + case 'x': + case 'X': + case 'I': + case 'E': + return (scm_read_number_and_radix (chr, port)); + case '{': + return (scm_read_extended_symbol (chr, port)); + case '!': + return (scm_read_scsh_block_comment (chr, port)); + default: + result = scm_read_sharp_extension (chr, port); + if (scm_is_eq (result, SCM_UNSPECIFIED)) + scm_i_input_error (FUNC_NAME, port, "Unknown # object: ~S", + scm_list_1 (SCM_MAKE_CHAR (chr))); + else + return result; + } + + return SCM_UNSPECIFIED; +} +#undef FUNC_NAME + +static SCM +scm_read_expression (SCM port) +#define FUNC_NAME "scm_read_expression" +{ + while (1) + { + register int chr; + + chr = scm_getc (port); + + switch (chr) + { + case SCM_WHITE_SPACES: + case SCM_LINE_INCREMENTORS: + break; + case ';': + (void) scm_read_semicolon_comment (chr, port); + break; + case '(': + return (scm_read_sexp (chr, port)); + case '"': + return (scm_read_string (chr, port)); + case '\'': + case '`': + case ',': + return (scm_read_quote (chr, port)); + case '#': + { + SCM result; + result = scm_read_sharp (chr, port); + if (scm_is_eq (result, SCM_UNSPECIFIED)) + /* We read a comment or some such. */ + break; + else + return result; + } + case ')': + scm_i_input_error (FUNC_NAME, port, "unexpected \")\"", SCM_EOL); + break; + case EOF: + return SCM_EOF_VAL; + case ':': + if (scm_is_eq (SCM_PACK (SCM_KEYWORD_STYLE), scm_keyword_prefix)) + return scm_symbol_to_keyword (scm_read_expression (port)); + /* Fall through. */ + + default: + { + if (((chr >= '0') && (chr <= '9')) + || (strchr ("+-.", chr))) + return (scm_read_number (chr, port)); + else + return (scm_read_mixed_case_symbol (chr, port)); + } + } + } +} +#undef FUNC_NAME + + +/* Actual reader. */ + +SCM_DEFINE (scm_read, "read", 0, 1, 0, + (SCM port), + "Read an s-expression from the input port @var{port}, or from\n" + "the current input port if @var{port} is not specified.\n" + "Any whitespace before the next token is discarded.") +#define FUNC_NAME s_scm_read +{ + int c; + + if (SCM_UNBNDP (port)) + port = scm_current_input_port (); + SCM_VALIDATE_OPINPORT (1, port); + + c = flush_ws (port, (char *) NULL); + if (EOF == c) + return SCM_EOF_VAL; + scm_ungetc (c, port); + + return (scm_read_expression (port)); } #undef FUNC_NAME +/* Used when recording expressions constructed by `scm_read_sharp ()'. */ +static SCM +recsexpr (SCM obj, long line, int column, SCM filename) +{ + if (!scm_is_pair(obj)) { + return obj; + } else { + SCM tmp = obj, copy; + /* If this sexpr is visible in the read:sharp source, we want to + keep that information, so only record non-constant cons cells + which haven't previously been read by the reader. */ + if (scm_is_false (scm_whash_lookup (scm_source_whash, obj))) + { + if (SCM_COPY_SOURCE_P) + { + copy = scm_cons (recsexpr (SCM_CAR (obj), line, column, filename), + SCM_UNDEFINED); + while ((tmp = SCM_CDR (tmp)) && scm_is_pair (tmp)) + { + SCM_SETCDR (copy, scm_cons (recsexpr (SCM_CAR (tmp), + line, + column, + filename), + SCM_UNDEFINED)); + copy = SCM_CDR (copy); + } + SCM_SETCDR (copy, tmp); + } + else + { + recsexpr (SCM_CAR (obj), line, column, filename); + while ((tmp = SCM_CDR (tmp)) && scm_is_pair (tmp)) + recsexpr (SCM_CAR (tmp), line, column, filename); + copy = SCM_UNDEFINED; + } + scm_whash_insert (scm_source_whash, + obj, + scm_make_srcprops (line, + column, + filename, + copy, + SCM_EOL)); + } + return obj; + } +} + /* Manipulate the read-hash-procedures alist. This could be written in Scheme, but maybe it will also be used by C code during initialisation. */ SCM_DEFINE (scm_read_hash_extend, "read-hash-extend", 2, 0, 0, --- orig/libguile/read.h +++ mod/libguile/read.h @@ -53,16 +53,12 @@ SCM_API SCM scm_read_options (SCM setting); SCM_API SCM scm_read (SCM port); -SCM_API char * scm_grow_tok_buf (SCM * tok_buf); -SCM_API int scm_flush_ws (SCM port, const char *eoferr); -SCM_API int scm_casei_streq (char * s1, char * s2); -SCM_API SCM scm_lreadr (SCM * tok_buf, SCM port, SCM *copy); SCM_API size_t scm_read_token (int ic, SCM * tok_buf, SCM port, int weird); -SCM_API SCM scm_lreadrecparen (SCM * tok_buf, SCM port, char *name, SCM *copy); SCM_API SCM scm_read_hash_extend (SCM chr, SCM proc); SCM_API void scm_i_input_error (const char *func, SCM port, - const char *message, SCM arg); + const char *message, SCM arg) + SCM_NORETURN; SCM_API void scm_init_read (void); --- orig/test-suite/ChangeLog +++ mod/test-suite/ChangeLog @@ -1,3 +1,14 @@ +2007-07-22 Ludovic Courtès + + * tests/reader.test: Added a proper header and `define-module'. + (exception:unterminated-block-comment, + exception:unknown-character-name, + exception:unknown-sharp-object, exception:eof-in-string, + exception:illegal-escape, with-read-options): New. + (reading)[block comment, unprintable symbol]: New tests. + (exceptions): New test prefix. + (read-options): New test prefix. + 2007-07-18 Stephen Compall * tests/syntax.test: Add SRFI-61 `cond' tests. --- orig/test-suite/tests/reader.test +++ mod/test-suite/tests/reader.test @@ -1,15 +1,55 @@ -;;;; reader.test --- test the Guile parser -*- scheme -*- -;;;; Jim Blandy --- September 1999 +;;;; reader.test --- Exercise the reader. -*- Scheme -*- +;;;; +;;;; Copyright (C) 1999, 2001, 2002, 2003, 2007 Free Software Foundation, Inc. +;;;; Jim Blandy +;;;; +;;;; This library is free software; you can redistribute it and/or +;;;; modify it under the terms of the GNU Lesser General Public +;;;; License as published by the Free Software Foundation; either +;;;; version 2.1 of the License, or (at your option) any later version. +;;;; +;;;; This library is distributed in the hope that it will be useful, +;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;;; Lesser General Public License for more details. +;;;; +;;;; You should have received a copy of the GNU Lesser General Public +;;;; License along with this library; if not, write to the Free Software +;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +(define-module (test-suite reader) + :use-module (test-suite lib)) + (define exception:eof (cons 'read-error "end of file$")) - (define exception:unexpected-rparen (cons 'read-error "unexpected \")\"$")) +(define exception:unterminated-block-comment + (cons 'read-error "unterminated `#! ... !#' comment$")) +(define exception:unknown-character-name + (cons 'read-error "unknown character name .*$")) +(define exception:unknown-sharp-object + (cons 'read-error "Unknown # object: .*$")) +(define exception:eof-in-string + (cons 'read-error "end of file in string constant$")) +(define exception:illegal-escape + (cons 'read-error "illegal character in escape sequence: .*$")) + (define (read-string s) (with-input-from-string s (lambda () (read)))) +(define (with-read-options opts thunk) + (let ((saved-options (read-options))) + (dynamic-wind + (lambda () + (read-options opts)) + thunk + (lambda () + (read-options saved-options))))) + + (with-test-prefix "reading" (pass-if "0" (equal? (read-string "0") 0)) @@ -31,8 +71,18 @@ (lambda (key subr message args rest) (apply format #f message args) ;; message and args are ok - #t)))) + #t))) + (pass-if "block comment" + (equal? '(+ 1 2 3) + (read-string "(+ 1 #! this is a\ncomment !# 2 3)"))) + + (pass-if "unprintable symbol" + ;; The reader tolerates unprintable characters for symbols. + (equal? (string->symbol "\001\002\003") + (read-string "\001\002\003")))) + + (pass-if-exception "radix passed to number->string can't be zero" exception:out-of-range (number->string 10 0)) @@ -40,6 +90,7 @@ exception:out-of-range (number->string 10 1)) + (with-test-prefix "mismatching parentheses" (pass-if-exception "opening parenthesis" exception:eof @@ -53,3 +104,53 @@ (pass-if-exception "closing parenthesis following mismatched vector opening" exception:unexpected-rparen (read-string ")"))) + + +(with-test-prefix "exceptions" + + ;; Reader exceptions: although they are not documented, they may be relied + ;; on by some programs, hence these tests. + + (pass-if-exception "unterminated block comment" + exception:unterminated-block-comment + (read-string "(+ 1 #! comment\n...")) + (pass-if-exception "unknown character name" + exception:unknown-character-name + (read-string "#\\theunknowncharacter")) + (pass-if-exception "unknown sharp object" + exception:unknown-sharp-object + (read-string "#?")) + (pass-if-exception "eof in string" + exception:eof-in-string + (read-string "\"the string that never ends")) + (pass-if-exception "illegal escape in string" + exception:illegal-escape + (read-string "\"some string \\???\""))) + + +(with-test-prefix "read-options" + (pass-if "case-sensitive" + (not (eq? 'guile 'GuiLe))) + (pass-if "case-insensitive" + (eq? 'guile + (with-read-options '(case-insensitive) + (lambda () + (read-string "GuiLe"))))) + (pass-if "prefix keywords" + (eq? #:keyword + (with-read-options '(keywords prefix case-insensitive) + (lambda () + (read-string ":KeyWord"))))) + (pass-if "no positions" + (let ((sexp (with-read-options '() + (lambda () + (read-string "(+ 1 2 3)"))))) + (and (not (source-property sexp 'line)) + (not (source-property sexp 'column))))) + (pass-if "positions" + (let ((sexp (with-read-options '(positions) + (lambda () + (read-string "(+ 1 2 3)"))))) + (and (equal? (source-property sexp 'line) 0) + (equal? (source-property sexp 'column) 0))))) +