From d437af76ec52f2860d8d07630eb8abcf3443d563 Mon Sep 17 00:00:00 2001 From: Mark H Weaver Date: Sat, 13 Oct 2012 23:02:05 -0400 Subject: [PATCH 3/4] Implement per-port reader options, #!fold-case and #!no-fold-case. * libguile/ports.c (scm_new_port_table_entry): Change initial values in 'scm_i_port_weak_hash' from SCM_BOOL_F to SCM_EOL, for use as an alist, where per-port reader options can be stored. * libguile/arrays.c (read_decimal_integer): Move to read.c. (scm_i_read_array): Remove. Incorporate the code into the 'scm_read_array' static function in read.c. * libguile/arrays.h (scm_i_read_array): Remove prototype. * libguile/read.c (scm_t_read_opts): New internal C struct type. (set_per_port_read_option, set_per_port_case_insensitive_p, init_read_options): New internal static functions. (CHAR_IS_R5RS_DELIMITER, CHAR_IS_DELIMITER): Move the '[' and ']' delimiters from CHAR_IS_R5RS_DELIMITER to CHAR_IS_DELIMITER. Consult 'opts' (assumed to be a local variable) to determine whether square brackets are delimiters. (scm_read): Call 'init_read_options' to initialize a local struct of type 'scm_t_read_opts'. A pointer to this struct is passed down to all reader helper functions that need it. (flush_ws, maybe_annotate_source, read_complete_token, read_token, scm_read_bytevector, scm_read_character, scm_read_commented_expression, scm_read_expression, scm_read_guile_bit_vector, scm_read_keyword, scm_read_mixed_case_symbol, scm_read_nil, scm_read_number, scm_read_number_and_radix, scm_read_quote, scm_read_sexp, scm_read_sharp, scm_read_sharp_extension, scm_read_shebang, scm_read_srfi4_vector, scm_read_string, scm_read_syntax, scm_read_vector): Add 'opts' as an additional parameter, and use it to look up read options. Previously the global read options were consulted directly. (read_decimal_integer): Move here from read.c. (scm_read_array): Add 'opts' as an additional parameter. Incorporate the code from 'scm_i_read_array'. Call 'scm_read_vector' and 'scm_read_sexp' instead of 'scm_read', to avoid recomputing 'opts'. * doc/ref/api-evaluation.texi (Case Sensitivity, Scheme Read): Mention the existence of per-port reader options, and the reader directives #!fold-case and #!no-fold-case. * test-suite/tests/reader.test ("per-port-read-options"): Add tests. --- doc/ref/api-evaluation.texi | 23 +- libguile/arrays.c | 175 +------------ libguile/arrays.h | 4 +- libguile/ports.c | 5 +- libguile/read.c | 586 ++++++++++++++++++++++++++++++++---------- test-suite/tests/reader.test | 13 + 6 files changed, 488 insertions(+), 318 deletions(-) diff --git a/doc/ref/api-evaluation.texi b/doc/ref/api-evaluation.texi index 6112832..9eccb39 100644 --- a/doc/ref/api-evaluation.texi +++ b/doc/ref/api-evaluation.texi @@ -254,6 +254,8 @@ Encoding of Source Files}. @node Case Sensitivity @subsubsection Case Sensitivity +@cindex fold-case +@cindex no-fold-case @c FIXME::martin: Review me! @@ -275,9 +277,9 @@ options, @xref{Scheme Read}. (read-enable 'case-insensitive) @end lisp -Note that this is seldom a problem, because Scheme programmers tend not -to use uppercase letters in their identifiers anyway. - +It is also possible to disable (or enable) case sensitivity within a +single file by placing the reader directives @code{#!fold-case} (or +@code{#!fold-case}) within the file itself. @node Keyword Syntax @subsubsection Keyword Syntax @@ -315,10 +317,10 @@ its read options. @cindex options - read @cindex read options @deffn {Scheme Procedure} read-options [setting] -Display the current settings of the read options. If @var{setting} is -omitted, only a short form of the current read options is printed. -Otherwise if @var{setting} is the symbol @code{help}, a complete options -description is displayed. +Display the current settings of the global read options. If +@var{setting} is omitted, only a short form of the current read options +is printed. Otherwise if @var{setting} is the symbol @code{help}, a +complete options description is displayed. @end deffn The set of available options, and their default values, may be had by @@ -338,6 +340,13 @@ hungry-eol-escapes no In strings, consume leading whitespace after an escaped end-of-line. @end smalllisp +Note that Guile also includes a preliminary mechanism for overriding +read options on a per-port basis. Currently, the only read option that +is overridden in this way is the @code{case-insensitive} option, which +is set or unset when the reader encounters the special directives +@code{#!fold-case} or @code{#!no-fold-case}. There is currently no +other way to access or set these per-port read options. + The boolean options may be toggled with @code{read-enable} and @code{read-disable}. The non-boolean @code{keywords} option must be set using @code{read-set!}. diff --git a/libguile/arrays.c b/libguile/arrays.c index a294f33..1eb10b9 100644 --- a/libguile/arrays.c +++ b/libguile/arrays.c @@ -1,4 +1,5 @@ -/* Copyright (C) 1995,1996,1997,1998,2000,2001,2002,2003,2004, 2005, 2006, 2009, 2010, 2011 Free Software Foundation, Inc. +/* Copyright (C) 1995,1996,1997,1998,2000,2001,2002,2003,2004,2005, + * 2006, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License @@ -814,178 +815,6 @@ scm_i_print_array (SCM array, SCM port, scm_print_state *pstate) return scm_i_print_array_dimension (&h, 0, 0, port, pstate); } -/* Read an array. This function can also read vectors and uniform - vectors. Also, the conflict between '#f' and '#f32' and '#f64' is - handled here. - - C is the first character read after the '#'. -*/ - -static int -read_decimal_integer (SCM port, int c, ssize_t *resp) -{ - ssize_t sign = 1; - ssize_t res = 0; - int got_it = 0; - - if (c == '-') - { - sign = -1; - c = scm_getc (port); - } - - while ('0' <= c && c <= '9') - { - res = 10*res + c-'0'; - got_it = 1; - c = scm_getc (port); - } - - if (got_it) - *resp = sign * res; - return c; -} - -SCM -scm_i_read_array (SCM port, int c) -{ - ssize_t rank; - scm_t_wchar tag_buf[8]; - int tag_len; - - SCM tag, shape = SCM_BOOL_F, elements; - - /* XXX - shortcut for ordinary vectors. Shouldn't be necessary but - the array code can not deal with zero-length dimensions yet, and - we want to allow zero-length vectors, of course. - */ - if (c == '(') - { - scm_ungetc (c, port); - return scm_vector (scm_read (port)); - } - - /* Disambiguate between '#f' and uniform floating point vectors. - */ - if (c == 'f') - { - c = scm_getc (port); - if (c != '3' && c != '6') - { - if (c != EOF) - scm_ungetc (c, port); - return SCM_BOOL_F; - } - rank = 1; - tag_buf[0] = 'f'; - tag_len = 1; - goto continue_reading_tag; - } - - /* Read rank. - */ - rank = 1; - c = read_decimal_integer (port, c, &rank); - if (rank < 0) - scm_i_input_error (NULL, port, "array rank must be non-negative", - SCM_EOL); - - /* Read tag. - */ - tag_len = 0; - continue_reading_tag: - while (c != EOF && c != '(' && c != '@' && c != ':' - && tag_len < sizeof tag_buf / sizeof tag_buf[0]) - { - tag_buf[tag_len++] = c; - c = scm_getc (port); - } - if (tag_len == 0) - tag = SCM_BOOL_T; - else - { - tag = scm_string_to_symbol (scm_from_utf32_stringn (tag_buf, tag_len)); - if (tag_len == sizeof tag_buf / sizeof tag_buf[0]) - scm_i_input_error (NULL, port, "invalid array tag, starting with: ~a", - scm_list_1 (tag)); - } - - /* Read shape. - */ - if (c == '@' || c == ':') - { - shape = SCM_EOL; - - do - { - ssize_t lbnd = 0, len = 0; - SCM s; - - if (c == '@') - { - c = scm_getc (port); - c = read_decimal_integer (port, c, &lbnd); - } - - s = scm_from_ssize_t (lbnd); - - if (c == ':') - { - c = scm_getc (port); - c = read_decimal_integer (port, c, &len); - if (len < 0) - scm_i_input_error (NULL, port, - "array length must be non-negative", - SCM_EOL); - - s = scm_list_2 (s, scm_from_ssize_t (lbnd+len-1)); - } - - shape = scm_cons (s, shape); - } while (c == '@' || c == ':'); - - shape = scm_reverse_x (shape, SCM_EOL); - } - - /* Read nested lists of elements. - */ - if (c != '(') - scm_i_input_error (NULL, port, - "missing '(' in vector or array literal", - SCM_EOL); - scm_ungetc (c, port); - elements = scm_read (port); - - if (scm_is_false (shape)) - shape = scm_from_ssize_t (rank); - else if (scm_ilength (shape) != rank) - scm_i_input_error - (NULL, port, - "the number of shape specifications must match the array rank", - SCM_EOL); - - /* Handle special print syntax of rank zero arrays; see - scm_i_print_array for a rationale. - */ - if (rank == 0) - { - if (!scm_is_pair (elements)) - scm_i_input_error (NULL, port, - "too few elements in array literal, need 1", - SCM_EOL); - if (!scm_is_null (SCM_CDR (elements))) - scm_i_input_error (NULL, port, - "too many elements in array literal, want 1", - SCM_EOL); - elements = SCM_CAR (elements); - } - - /* Construct array. - */ - return scm_list_to_typed_array (tag, shape, elements); -} - - static SCM array_handle_ref (scm_t_array_handle *h, size_t pos) { diff --git a/libguile/arrays.h b/libguile/arrays.h index 5ea604d..6045ab6 100644 --- a/libguile/arrays.h +++ b/libguile/arrays.h @@ -3,7 +3,8 @@ #ifndef SCM_ARRAY_H #define SCM_ARRAY_H -/* Copyright (C) 1995,1996,1997,1999,2000,2001, 2004, 2006, 2008, 2009, 2010 Free Software Foundation, Inc. +/* Copyright (C) 1995,1996,1997,1999,2000,2001, 2004, 2006, 2008, 2009, + * 2010, 2012 Free Software Foundation, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License @@ -73,7 +74,6 @@ typedef struct scm_i_t_array SCM_INTERNAL SCM scm_i_make_array (int ndim); SCM_INTERNAL int scm_i_print_array (SCM array, SCM port, scm_print_state *pstate); -SCM_INTERNAL SCM scm_i_read_array (SCM port, int c); SCM_INTERNAL void scm_init_arrays (void); diff --git a/libguile/ports.c b/libguile/ports.c index 301bc44..b16a463 100644 --- a/libguile/ports.c +++ b/libguile/ports.c @@ -533,7 +533,8 @@ scm_i_dynwind_current_load_port (SCM port) /* We need a global registry of ports to flush them all at exit, and to - get all the ports matching a file descriptor. + get all the ports matching a file descriptor. The associated values + are alists, currently used only for per-port reader options. */ SCM scm_i_port_weak_hash; @@ -633,7 +634,7 @@ scm_new_port_table_entry (scm_t_bits tag) SCM_SET_CELL_TYPE (z, tag); SCM_SETPTAB_ENTRY (z, entry); - scm_hashq_set_x (scm_i_port_weak_hash, z, SCM_BOOL_F); + scm_hashq_set_x (scm_i_port_weak_hash, z, SCM_EOL); /* For each new port, register a finalizer so that it port type's free function can be invoked eventually. */ diff --git a/libguile/read.c b/libguile/read.c index 87d73bf..b828f55 100644 --- a/libguile/read.c +++ b/libguile/read.c @@ -82,6 +82,145 @@ scm_t_option scm_read_opts[] = { }; /* + * Internal read options structure. This is initialized by 'scm_read' + * from the global and per-port read options, and a pointer is passed + * down to all helper functions. + */ +typedef struct { + enum { KEYWORD_STYLE_HASH_PREFIX, + KEYWORD_STYLE_PREFIX, + KEYWORD_STYLE_POSTFIX } keyword_style; + char copy_source_p; + char record_positions_p; + char case_insensitive_p; + char r6rs_escapes_p; + char square_brackets_p; + char hungry_eol_escapes_p; +} scm_t_read_opts; + +/* + * Per-port read option overrides. + * + * In order to implement the reader directives "#!fold-case" and + * "#!no-fold-case" properly, we need to set the 'case-insensitive' read + * option on a per-port basis. We also anticipate a need to set other + * read options on a per-port basis as well. + * + * We store per-port read option overrides in the + * '%read-option-overrides%' key of the port's alist, which is stored in + * 'scm_i_port_weak_hash'. The value stored in the alist is a single + * integer that contains a two-bit field for each read option. + * + * If a bit field contains OVERRIDE_DEFAULT (3), that indicates that the + * corresponding read option has not been overridden for this port, so + * the global read option should be used. Otherwise, the bit field + * contains the value of the read option. For boolean read options that + * have been overridden, the other possible values are 0 or 1. If the + * 'keyword_style' read option is overridden, its possible values are + * taken from the enum of the 'scm_t_read_opts' struct. + */ + +SCM_SYMBOL (sym_read_option_overrides, "%read-option-overrides%"); + +/* Offsets of bit fields for each per-port override */ +#define OVERRIDE_SHIFT_COPY_SOURCE_P 0 +#define OVERRIDE_SHIFT_RECORD_POSITIONS_P 2 +#define OVERRIDE_SHIFT_CASE_INSENSITIVE_P 4 +#define OVERRIDE_SHIFT_KEYWORD_STYLE 6 +#define OVERRIDE_SHIFT_R6RS_ESCAPES_P 8 +#define OVERRIDE_SHIFT_SQUARE_BRACKETS_P 10 +#define OVERRIDE_SHIFT_HUNGRY_EOL_ESCAPES_P 12 +#define OVERRIDES_SHIFT_END 14 + +#define OVERRIDES_ALL_DEFAULTS ((1UL << OVERRIDES_SHIFT_END) - 1) +#define OVERRIDES_MAX_VALUE OVERRIDES_ALL_DEFAULTS + +#define OVERRIDE_MASK 3 +#define OVERRIDE_DEFAULT 3 + +static void +set_per_port_read_option (SCM port, int shift, int value) +{ + SCM alist, scm_overrides; + int overrides; + + value &= OVERRIDE_MASK; + scm_i_scm_pthread_mutex_lock (&scm_i_port_table_mutex); + alist = scm_hashq_ref (scm_i_port_weak_hash, port, SCM_BOOL_F); + scm_overrides = scm_assq_ref (alist, sym_read_option_overrides); + if (scm_is_unsigned_integer (scm_overrides, 0, OVERRIDES_MAX_VALUE)) + overrides = scm_to_int (scm_overrides); + else + overrides = OVERRIDES_ALL_DEFAULTS; + overrides &= ~(OVERRIDE_MASK << shift); + overrides |= value << shift; + scm_overrides = scm_from_int (overrides); + alist = scm_assq_set_x (alist, sym_read_option_overrides, scm_overrides); + scm_hashq_set_x (scm_i_port_weak_hash, port, alist); + scm_i_pthread_mutex_unlock (&scm_i_port_table_mutex); +} + +/* Set case-insensitivity on a per-port basis. */ +static void +set_per_port_case_insensitive_p (SCM port, scm_t_read_opts *opts, int value) +{ + value = !!value; + opts->case_insensitive_p = value; + set_per_port_read_option (port, OVERRIDE_SHIFT_CASE_INSENSITIVE_P, value); +} + +/* Initialize the internal read options structure from the global and + per-port read options. */ +static void +init_read_options (SCM port, scm_t_read_opts *opts) +{ + SCM alist, val, scm_overrides; + int overrides; + int x; + + scm_i_scm_pthread_mutex_lock (&scm_i_port_table_mutex); + alist = scm_hashq_ref (scm_i_port_weak_hash, port, SCM_BOOL_F); + scm_overrides = scm_assq_ref (alist, sym_read_option_overrides); + scm_i_pthread_mutex_unlock (&scm_i_port_table_mutex); + + if (scm_is_unsigned_integer (scm_overrides, 0, OVERRIDES_MAX_VALUE)) + overrides = scm_to_int (scm_overrides); + else + overrides = OVERRIDES_ALL_DEFAULTS; + + x = OVERRIDE_MASK & (overrides >> OVERRIDE_SHIFT_KEYWORD_STYLE); + if (x == OVERRIDE_DEFAULT) + { + val = SCM_PACK (SCM_KEYWORD_STYLE); + if (scm_is_eq (val, scm_keyword_prefix)) + x = KEYWORD_STYLE_PREFIX; + else if (scm_is_eq (val, scm_keyword_postfix)) + x = KEYWORD_STYLE_POSTFIX; + else + x = KEYWORD_STYLE_HASH_PREFIX; + } + opts->keyword_style = x; + +#define RESOLVE_BOOLEAN_OPTION(NAME, name) \ + do { \ + x = OVERRIDE_MASK & (overrides >> OVERRIDE_SHIFT_ ## NAME); \ + if (x == OVERRIDE_DEFAULT) \ + x = !!SCM_ ## NAME; \ + opts->name = x; \ + } while (0) + + RESOLVE_BOOLEAN_OPTION(COPY_SOURCE_P, copy_source_p); + RESOLVE_BOOLEAN_OPTION(RECORD_POSITIONS_P, record_positions_p); + RESOLVE_BOOLEAN_OPTION(CASE_INSENSITIVE_P, case_insensitive_p); + RESOLVE_BOOLEAN_OPTION(R6RS_ESCAPES_P, r6rs_escapes_p); + RESOLVE_BOOLEAN_OPTION(SQUARE_BRACKETS_P, square_brackets_p); + RESOLVE_BOOLEAN_OPTION(HUNGRY_EOL_ESCAPES_P, hungry_eol_escapes_p); + +#undef RESOLVE_BOOLEAN_OPTION +} + + +/* Give meaningful error messages for errors We use the format @@ -167,6 +306,9 @@ scm_i_read_hash_procedures_set_x (SCM value) /* The maximum size of Scheme character names. */ #define READER_CHAR_NAME_MAX_SIZE 50 +/* The maximum size of reader directive names. */ +#define READER_DIRECTIVE_NAME_MAX_SIZE 15 + /* `isblank' is only in C99. */ #define CHAR_IS_BLANK_(_chr) \ @@ -185,10 +327,11 @@ scm_i_read_hash_procedures_set_x (SCM value) structure''). */ #define CHAR_IS_R5RS_DELIMITER(c) \ (CHAR_IS_BLANK (c) \ - || (c == ')') || (c == '(') || (c == ';') || (c == '"') \ - || (SCM_SQUARE_BRACKETS_P && ((c == '[') || (c == ']')))) + || (c) == ')' || (c) == '(' || (c) == ';' || (c) == '"') -#define CHAR_IS_DELIMITER CHAR_IS_R5RS_DELIMITER +#define CHAR_IS_DELIMITER(c) \ + (CHAR_IS_R5RS_DELIMITER (c) \ + || (((c) == ']' || (c) == '[') && opts->square_brackets_p)) /* Exponent markers, as defined in section 7.1.1 of R5RS, ``Lexical Structure''. */ @@ -199,8 +342,8 @@ scm_i_read_hash_procedures_set_x (SCM value) /* Read an SCSH block comment. */ static SCM scm_read_scsh_block_comment (scm_t_wchar, SCM); static SCM scm_read_r6rs_block_comment (scm_t_wchar, SCM); -static SCM scm_read_commented_expression (scm_t_wchar, SCM); -static SCM scm_read_shebang (scm_t_wchar, SCM); +static SCM scm_read_commented_expression (scm_t_wchar, SCM, scm_t_read_opts *); +static SCM scm_read_shebang (scm_t_wchar, SCM, scm_t_read_opts *); static SCM scm_get_hash_procedure (int); /* Read from PORT until a delimiter (e.g., a whitespace) is read. Put the @@ -208,7 +351,8 @@ static SCM scm_get_hash_procedure (int); fewer than BUF_SIZE bytes, non-zero otherwise. READ will be set the number of bytes actually read. */ static int -read_token (SCM port, char *buf, size_t buf_size, size_t *read) +read_token (SCM port, scm_t_read_opts *opts, + char *buf, size_t buf_size, size_t *read) { *read = 0; @@ -238,8 +382,8 @@ read_token (SCM port, char *buf, size_t buf_size, size_t *read) /* Like `read_token', but return either BUFFER, or a GC-allocated buffer if the token doesn't fit in BUFFER_SIZE bytes. */ static char * -read_complete_token (SCM port, char *buffer, size_t buffer_size, - size_t *read) +read_complete_token (SCM port, scm_t_read_opts *opts, + char *buffer, size_t buffer_size, size_t *read) { int overflow = 0; size_t bytes_read, overflow_size = 0; @@ -247,7 +391,7 @@ read_complete_token (SCM port, char *buffer, size_t buffer_size, do { - overflow = read_token (port, buffer, buffer_size, &bytes_read); + overflow = read_token (port, opts, buffer, buffer_size, &bytes_read); if (bytes_read == 0) break; if (overflow || overflow_size != 0) @@ -284,7 +428,7 @@ read_complete_token (SCM port, char *buffer, size_t buffer_size, /* Skip whitespace from PORT and return the first non-whitespace character read. Raise an error on end-of-file. */ static int -flush_ws (SCM port, const char *eoferr) +flush_ws (SCM port, scm_t_read_opts *opts, const char *eoferr) { scm_t_wchar c; while (1) @@ -321,10 +465,10 @@ flush_ws (SCM port, const char *eoferr) eoferr = "read_sharp"; goto goteof; case '!': - scm_read_shebang (c, port); + scm_read_shebang (c, port, opts); break; case ';': - scm_read_commented_expression (c, port); + scm_read_commented_expression (c, port, opts); break; case '|': if (scm_is_false (scm_get_hash_procedure (c))) @@ -355,20 +499,22 @@ flush_ws (SCM port, const char *eoferr) /* Token readers. */ -static SCM scm_read_expression (SCM port); -static SCM scm_read_sharp (int chr, SCM port, long line, int column); +static SCM scm_read_expression (SCM port, scm_t_read_opts *opts); +static SCM scm_read_sharp (int chr, SCM port, scm_t_read_opts *opts, + long line, int column); static SCM -maybe_annotate_source (SCM x, SCM port, long line, int column) +maybe_annotate_source (SCM x, SCM port, scm_t_read_opts *opts, + long line, int column) { - if (SCM_RECORD_POSITIONS_P) + if (opts->record_positions_p) scm_i_set_source_properties_x (x, line, column, SCM_FILENAME (port)); return x; } static SCM -scm_read_sexp (scm_t_wchar chr, SCM port) +scm_read_sexp (scm_t_wchar chr, SCM port, scm_t_read_opts *opts) #define FUNC_NAME "scm_i_lreadparen" { int c; @@ -379,20 +525,20 @@ scm_read_sexp (scm_t_wchar chr, SCM port) long line = SCM_LINUM (port); int column = SCM_COL (port) - 1; - c = flush_ws (port, FUNC_NAME); + c = flush_ws (port, opts, FUNC_NAME); if (terminating_char == c) return SCM_EOL; scm_ungetc (c, port); - tmp = scm_read_expression (port); + tmp = scm_read_expression (port, opts); /* Note that it is possible for scm_read_expression to return scm_sym_dot, but not as part of a dotted pair: as in #{.}#. So check that it's a real dot by checking `c'. */ if (c == '.' && scm_is_eq (scm_sym_dot, tmp)) { - ans = scm_read_expression (port); - if (terminating_char != (c = flush_ws (port, FUNC_NAME))) + ans = scm_read_expression (port, opts); + if (terminating_char != (c = flush_ws (port, opts, FUNC_NAME))) scm_i_input_error (FUNC_NAME, port, "missing close paren", SCM_EOL); return ans; @@ -401,24 +547,24 @@ scm_read_sexp (scm_t_wchar chr, SCM port) /* Build the head of the list structure. */ ans = tl = scm_cons (tmp, SCM_EOL); - while (terminating_char != (c = flush_ws (port, FUNC_NAME))) + while (terminating_char != (c = flush_ws (port, opts, FUNC_NAME))) { SCM new_tail; - if (c == ')' || (SCM_SQUARE_BRACKETS_P && c == ']')) + if (c == ')' || (opts->square_brackets_p && c == ']')) scm_i_input_error (FUNC_NAME, port, "in pair: mismatched close paren: ~A", scm_list_1 (SCM_MAKE_CHAR (c))); scm_ungetc (c, port); - tmp = scm_read_expression (port); + tmp = scm_read_expression (port, opts); /* See above note about scm_sym_dot. */ if (c == '.' && scm_is_eq (scm_sym_dot, tmp)) { - SCM_SETCDR (tl, scm_read_expression (port)); + SCM_SETCDR (tl, scm_read_expression (port, opts)); - c = flush_ws (port, FUNC_NAME); + c = flush_ws (port, opts, FUNC_NAME); if (terminating_char != c) scm_i_input_error (FUNC_NAME, port, "in pair: missing close paren", SCM_EOL); @@ -431,7 +577,7 @@ scm_read_sexp (scm_t_wchar chr, SCM port) } exit: - return maybe_annotate_source (ans, port, line, column); + return maybe_annotate_source (ans, port, opts, line, column); } #undef FUNC_NAME @@ -487,7 +633,7 @@ skip_intraline_whitespace (SCM port) } static SCM -scm_read_string (int chr, SCM port) +scm_read_string (int chr, SCM port, scm_t_read_opts *opts) #define FUNC_NAME "scm_lreadr" { /* For strings smaller than C_STR, this function creates only one Scheme @@ -526,7 +672,7 @@ scm_read_string (int chr, SCM port) case '\\': break; case '\n': - if (SCM_HUNGRY_EOL_ESCAPES_P) + if (opts->hungry_eol_escapes_p) skip_intraline_whitespace (port); continue; case '0': @@ -554,19 +700,19 @@ scm_read_string (int chr, SCM port) c = '\010'; break; case 'x': - if (SCM_R6RS_ESCAPES_P) + if (opts->r6rs_escapes_p) SCM_READ_HEX_ESCAPE (10, ';'); else SCM_READ_HEX_ESCAPE (2, '\0'); break; case 'u': - if (!SCM_R6RS_ESCAPES_P) + if (!opts->r6rs_escapes_p) { SCM_READ_HEX_ESCAPE (4, '\0'); break; } case 'U': - if (!SCM_R6RS_ESCAPES_P) + if (!opts->r6rs_escapes_p) { SCM_READ_HEX_ESCAPE (6, '\0'); break; @@ -593,13 +739,13 @@ scm_read_string (int chr, SCM port) str = scm_string_concatenate_reverse (str, SCM_UNDEFINED, SCM_UNDEFINED); } - return maybe_annotate_source (str, port, line, column); + return maybe_annotate_source (str, port, opts, line, column); } #undef FUNC_NAME static SCM -scm_read_number (scm_t_wchar chr, SCM port) +scm_read_number (scm_t_wchar chr, SCM port, scm_t_read_opts *opts) { SCM result, str = SCM_EOL; char local_buffer[READER_BUFFER_SIZE], *buffer; @@ -611,7 +757,7 @@ scm_read_number (scm_t_wchar chr, SCM port) int column = SCM_COL (port) - 1; scm_ungetc (chr, port); - buffer = read_complete_token (port, local_buffer, sizeof local_buffer, + buffer = read_complete_token (port, opts, local_buffer, sizeof local_buffer, &bytes_read); str = scm_from_stringn (buffer, bytes_read, pt->encoding, pt->ilseq_handler); @@ -620,30 +766,30 @@ scm_read_number (scm_t_wchar chr, SCM port) if (scm_is_false (result)) { /* Return a symbol instead of a number */ - if (SCM_CASE_INSENSITIVE_P) + if (opts->case_insensitive_p) str = scm_string_downcase_x (str); result = scm_string_to_symbol (str); } else if (SCM_NIMP (result)) - result = maybe_annotate_source (result, port, line, column); + result = maybe_annotate_source (result, port, opts, line, column); SCM_COL (port) += scm_i_string_length (str); return result; } static SCM -scm_read_mixed_case_symbol (scm_t_wchar chr, SCM port) +scm_read_mixed_case_symbol (scm_t_wchar chr, SCM port, scm_t_read_opts *opts) { SCM result; int ends_with_colon = 0; size_t bytes_read; - int postfix = scm_is_eq (SCM_PACK (SCM_KEYWORD_STYLE), scm_keyword_postfix); + int postfix = (opts->keyword_style == KEYWORD_STYLE_POSTFIX); char local_buffer[READER_BUFFER_SIZE], *buffer; scm_t_port *pt = SCM_PTAB_ENTRY (port); SCM str; scm_ungetc (chr, port); - buffer = read_complete_token (port, local_buffer, sizeof local_buffer, + buffer = read_complete_token (port, opts, local_buffer, sizeof local_buffer, &bytes_read); if (bytes_read > 0) ends_with_colon = buffer[bytes_read - 1] == ':'; @@ -653,7 +799,7 @@ scm_read_mixed_case_symbol (scm_t_wchar chr, SCM port) str = scm_from_stringn (buffer, bytes_read - 1, pt->encoding, pt->ilseq_handler); - if (SCM_CASE_INSENSITIVE_P) + if (opts->case_insensitive_p) str = scm_string_downcase_x (str); result = scm_symbol_to_keyword (scm_string_to_symbol (str)); } @@ -662,7 +808,7 @@ scm_read_mixed_case_symbol (scm_t_wchar chr, SCM port) str = scm_from_stringn (buffer, bytes_read, pt->encoding, pt->ilseq_handler); - if (SCM_CASE_INSENSITIVE_P) + if (opts->case_insensitive_p) str = scm_string_downcase_x (str); result = scm_string_to_symbol (str); } @@ -672,7 +818,7 @@ scm_read_mixed_case_symbol (scm_t_wchar chr, SCM port) } static SCM -scm_read_number_and_radix (scm_t_wchar chr, SCM port) +scm_read_number_and_radix (scm_t_wchar chr, SCM port, scm_t_read_opts *opts) #define FUNC_NAME "scm_lreadr" { SCM result; @@ -710,7 +856,7 @@ scm_read_number_and_radix (scm_t_wchar chr, SCM port) radix = 10; } - buffer = read_complete_token (port, local_buffer, sizeof local_buffer, + buffer = read_complete_token (port, opts, local_buffer, sizeof local_buffer, &read); pt = SCM_PTAB_ENTRY (port); @@ -730,7 +876,7 @@ scm_read_number_and_radix (scm_t_wchar chr, SCM port) #undef FUNC_NAME static SCM -scm_read_quote (int chr, SCM port) +scm_read_quote (int chr, SCM port, scm_t_read_opts *opts) { SCM p; long line = SCM_LINUM (port); @@ -767,8 +913,8 @@ scm_read_quote (int chr, SCM port) abort (); } - p = scm_cons2 (p, scm_read_expression (port), SCM_EOL); - return maybe_annotate_source (p, port, line, column); + p = scm_cons2 (p, scm_read_expression (port, opts), SCM_EOL); + return maybe_annotate_source (p, port, opts, line, column); } SCM_SYMBOL (sym_syntax, "syntax"); @@ -777,7 +923,7 @@ SCM_SYMBOL (sym_unsyntax, "unsyntax"); SCM_SYMBOL (sym_unsyntax_splicing, "unsyntax-splicing"); static SCM -scm_read_syntax (int chr, SCM port) +scm_read_syntax (int chr, SCM port, scm_t_read_opts *opts) { SCM p; long line = SCM_LINUM (port); @@ -814,14 +960,14 @@ scm_read_syntax (int chr, SCM port) abort (); } - p = scm_cons2 (p, scm_read_expression (port), SCM_EOL); - return maybe_annotate_source (p, port, line, column); + p = scm_cons2 (p, scm_read_expression (port, opts), SCM_EOL); + return maybe_annotate_source (p, port, opts, line, column); } static SCM -scm_read_nil (int chr, SCM port) +scm_read_nil (int chr, SCM port, scm_t_read_opts *opts) { - SCM id = scm_read_mixed_case_symbol (chr, port); + SCM id = scm_read_mixed_case_symbol (chr, port, opts); if (!scm_is_eq (id, sym_nil)) scm_i_input_error ("scm_read_nil", port, @@ -867,7 +1013,7 @@ scm_read_boolean (int chr, SCM port) } static SCM -scm_read_character (scm_t_wchar chr, SCM port) +scm_read_character (scm_t_wchar chr, SCM port, scm_t_read_opts *opts) #define FUNC_NAME "scm_lreadr" { char buffer[READER_CHAR_NAME_MAX_SIZE]; @@ -877,7 +1023,8 @@ scm_read_character (scm_t_wchar chr, SCM port) int overflow; scm_t_port *pt; - overflow = read_token (port, buffer, READER_CHAR_NAME_MAX_SIZE, &bytes_read); + overflow = read_token (port, opts, buffer, READER_CHAR_NAME_MAX_SIZE, + &bytes_read); if (overflow) scm_i_input_error (FUNC_NAME, port, "character name too long", SCM_EOL); @@ -973,7 +1120,7 @@ scm_read_character (scm_t_wchar chr, SCM port) #undef FUNC_NAME static SCM -scm_read_keyword (int chr, SCM port) +scm_read_keyword (int chr, SCM port, scm_t_read_opts *opts) { SCM symbol; @@ -982,7 +1129,7 @@ scm_read_keyword (int chr, SCM port) to adapt to the delimiters currently valid of symbols. XXX: This implementation allows sloppy syntaxes like `#: key'. */ - symbol = scm_read_expression (port); + symbol = scm_read_expression (port, opts); if (!scm_is_symbol (symbol)) scm_i_input_error ("scm_read_keyword", port, "keyword prefix `~a' not followed by a symbol: ~s", @@ -992,34 +1139,195 @@ scm_read_keyword (int chr, SCM port) } static SCM -scm_read_vector (int chr, SCM port, long line, int column) +scm_read_vector (int chr, SCM port, scm_t_read_opts *opts, + long line, int column) { /* Note: We call `scm_read_sexp ()' rather than READER here in order to guarantee that it's going to do what we want. After all, this is an implementation detail of `scm_read_vector ()', not a desirable property. */ - return maybe_annotate_source (scm_vector (scm_read_sexp (chr, port)), - port, line, column); + return maybe_annotate_source (scm_vector (scm_read_sexp (chr, port, opts)), + port, opts, line, column); +} + +/* Helper used by scm_read_array */ +static int +read_decimal_integer (SCM port, int c, ssize_t *resp) +{ + ssize_t sign = 1; + ssize_t res = 0; + int got_it = 0; + + if (c == '-') + { + sign = -1; + c = scm_getc (port); + } + + while ('0' <= c && c <= '9') + { + res = 10*res + c-'0'; + got_it = 1; + c = scm_getc (port); + } + + if (got_it) + *resp = sign * res; + return c; } +/* Read an array. This function can also read vectors and uniform + vectors. Also, the conflict between '#f' and '#f32' and '#f64' is + handled here. + + C is the first character read after the '#'. +*/ static SCM -scm_read_array (int chr, SCM port, long line, int column) +scm_read_array (int c, SCM port, scm_t_read_opts *opts, + long line, int column) { - SCM result = scm_i_read_array (port, chr); - if (scm_is_false (result)) - return result; + ssize_t rank; + scm_t_wchar tag_buf[8]; + int tag_len; + + SCM tag, shape = SCM_BOOL_F, elements, array; + + /* XXX - shortcut for ordinary vectors. Shouldn't be necessary but + the array code can not deal with zero-length dimensions yet, and + we want to allow zero-length vectors, of course. + */ + if (c == '(') + return scm_read_vector (c, port, opts, line, column); + + /* Disambiguate between '#f' and uniform floating point vectors. + */ + if (c == 'f') + { + c = scm_getc (port); + if (c != '3' && c != '6') + { + if (c != EOF) + scm_ungetc (c, port); + return SCM_BOOL_F; + } + rank = 1; + tag_buf[0] = 'f'; + tag_len = 1; + goto continue_reading_tag; + } + + /* Read rank. + */ + rank = 1; + c = read_decimal_integer (port, c, &rank); + if (rank < 0) + scm_i_input_error (NULL, port, "array rank must be non-negative", + SCM_EOL); + + /* Read tag. + */ + tag_len = 0; + continue_reading_tag: + while (c != EOF && c != '(' && c != '@' && c != ':' + && tag_len < sizeof tag_buf / sizeof tag_buf[0]) + { + tag_buf[tag_len++] = c; + c = scm_getc (port); + } + if (tag_len == 0) + tag = SCM_BOOL_T; else - return maybe_annotate_source (result, port, line, column); + { + tag = scm_string_to_symbol (scm_from_utf32_stringn (tag_buf, tag_len)); + if (tag_len == sizeof tag_buf / sizeof tag_buf[0]) + scm_i_input_error (NULL, port, "invalid array tag, starting with: ~a", + scm_list_1 (tag)); + } + + /* Read shape. + */ + if (c == '@' || c == ':') + { + shape = SCM_EOL; + + do + { + ssize_t lbnd = 0, len = 0; + SCM s; + + if (c == '@') + { + c = scm_getc (port); + c = read_decimal_integer (port, c, &lbnd); + } + + s = scm_from_ssize_t (lbnd); + + if (c == ':') + { + c = scm_getc (port); + c = read_decimal_integer (port, c, &len); + if (len < 0) + scm_i_input_error (NULL, port, + "array length must be non-negative", + SCM_EOL); + + s = scm_list_2 (s, scm_from_ssize_t (lbnd+len-1)); + } + + shape = scm_cons (s, shape); + } while (c == '@' || c == ':'); + + shape = scm_reverse_x (shape, SCM_EOL); + } + + /* Read nested lists of elements. + */ + if (c != '(') + scm_i_input_error (NULL, port, + "missing '(' in vector or array literal", + SCM_EOL); + elements = scm_read_sexp (c, port, opts); + + if (scm_is_false (shape)) + shape = scm_from_ssize_t (rank); + else if (scm_ilength (shape) != rank) + scm_i_input_error + (NULL, port, + "the number of shape specifications must match the array rank", + SCM_EOL); + + /* Handle special print syntax of rank zero arrays; see + scm_i_print_array for a rationale. + */ + if (rank == 0) + { + if (!scm_is_pair (elements)) + scm_i_input_error (NULL, port, + "too few elements in array literal, need 1", + SCM_EOL); + if (!scm_is_null (SCM_CDR (elements))) + scm_i_input_error (NULL, port, + "too many elements in array literal, want 1", + SCM_EOL); + elements = SCM_CAR (elements); + } + + /* Construct array */ + array = scm_list_to_typed_array (tag, shape, elements); + return maybe_annotate_source (array, port, opts, line, column); } static SCM -scm_read_srfi4_vector (int chr, SCM port, long line, int column) +scm_read_srfi4_vector (int chr, SCM port, scm_t_read_opts *opts, + long line, int column) { - return scm_read_array (chr, port, line, column); + return scm_read_array (chr, port, opts, line, column); } static SCM -scm_read_bytevector (scm_t_wchar chr, SCM port, long line, int column) +scm_read_bytevector (scm_t_wchar chr, SCM port, scm_t_read_opts *opts, + long line, int column) { chr = scm_getc (port); if (chr != 'u') @@ -1034,8 +1342,8 @@ scm_read_bytevector (scm_t_wchar chr, SCM port, long line, int column) goto syntax; return maybe_annotate_source - (scm_u8_list_to_bytevector (scm_read_sexp (chr, port)), - port, line, column); + (scm_u8_list_to_bytevector (scm_read_sexp (chr, port, opts)), + port, opts, line, column); syntax: scm_i_input_error ("read_bytevector", port, @@ -1045,7 +1353,8 @@ scm_read_bytevector (scm_t_wchar chr, SCM port, long line, int column) } static SCM -scm_read_guile_bit_vector (scm_t_wchar chr, SCM port, long line, int column) +scm_read_guile_bit_vector (scm_t_wchar chr, SCM port, scm_t_read_opts *opts, + long line, int column) { /* Read the `#*10101'-style read syntax for bit vectors in Guile. This is terribly inefficient but who cares? */ @@ -1063,7 +1372,7 @@ scm_read_guile_bit_vector (scm_t_wchar chr, SCM port, long line, int column) return maybe_annotate_source (scm_bitvector (scm_reverse_x (s_bits, SCM_EOL)), - port, line, column); + port, opts, line, column); } static SCM @@ -1091,37 +1400,40 @@ scm_read_scsh_block_comment (scm_t_wchar chr, SCM port) } static SCM -scm_read_shebang (scm_t_wchar chr, SCM port) +scm_read_shebang (scm_t_wchar chr, SCM port, scm_t_read_opts *opts) { - int c = 0; - if ((c = scm_get_byte_or_eof (port)) != 'r') - { - scm_ungetc (c, port); - return scm_read_scsh_block_comment (chr, port); - } - if ((c = scm_get_byte_or_eof (port)) != '6') - { - scm_ungetc (c, port); - scm_ungetc ('r', port); - return scm_read_scsh_block_comment (chr, port); - } - if ((c = scm_get_byte_or_eof (port)) != 'r') - { - scm_ungetc (c, port); - scm_ungetc ('6', port); - scm_ungetc ('r', port); - return scm_read_scsh_block_comment (chr, port); - } - if ((c = scm_get_byte_or_eof (port)) != 's') + char name[READER_DIRECTIVE_NAME_MAX_SIZE + 1]; + int c; + int i = 0; + + /* FIXME: Maybe handle shebang at the beginning of a file differently? */ + while (i <= READER_DIRECTIVE_NAME_MAX_SIZE) { - scm_ungetc (c, port); - scm_ungetc ('r', port); - scm_ungetc ('6', port); - scm_ungetc ('r', port); - return scm_read_scsh_block_comment (chr, port); + c = scm_getc (port); + if (c == EOF) + scm_i_input_error ("skip_block_comment", port, + "unterminated `#! ... !#' comment", SCM_EOL); + else if (('a' <= c && c <= 'z') || ('0' <= c && c <= '9') || c == '-') + name[i++] = c; + else if (CHAR_IS_DELIMITER (c)) + { + scm_ungetc (c, port); + name[i] = '\0'; + if (0 == strcmp ("r6rs", name)) + ; /* Silently ignore */ + else if (0 == strcmp ("fold-case", name)) + set_per_port_case_insensitive_p (port, opts, 1); + else if (0 == strcmp ("no-fold-case", name)) + set_per_port_case_insensitive_p (port, opts, 0); + else + break; + + return SCM_UNSPECIFIED; + } } - - return SCM_UNSPECIFIED; + while (i > 0) + scm_ungetc (name[--i], port); + return scm_read_scsh_block_comment (chr, port); } static SCM @@ -1163,16 +1475,17 @@ scm_read_r6rs_block_comment (scm_t_wchar chr, SCM port) } static SCM -scm_read_commented_expression (scm_t_wchar chr, SCM port) +scm_read_commented_expression (scm_t_wchar chr, SCM port, + scm_t_read_opts *opts) { scm_t_wchar c; - c = flush_ws (port, (char *) NULL); + c = flush_ws (port, opts, (char *) NULL); if (EOF == c) scm_i_input_error ("read_commented_expression", port, "no expression after #; comment", SCM_EOL); scm_ungetc (c, port); - scm_read_expression (port); + scm_read_expression (port, opts); return SCM_UNSPECIFIED; } @@ -1274,7 +1587,7 @@ scm_read_extended_symbol (scm_t_wchar chr, SCM port) /* Top-level token readers, i.e., dispatchers. */ static SCM -scm_read_sharp_extension (int chr, SCM port) +scm_read_sharp_extension (int chr, SCM port, scm_t_read_opts *opts) { SCM proc; @@ -1287,7 +1600,8 @@ scm_read_sharp_extension (int chr, SCM port) got = scm_call_2 (proc, SCM_MAKE_CHAR (chr), port); - if (scm_is_pair (got) && !scm_i_has_source_properties (got)) + if (opts->record_positions_p && SCM_NIMP (got) + && !scm_i_has_source_properties (got)) scm_i_set_source_properties_x (got, line, column, SCM_FILENAME (port)); return got; @@ -1299,39 +1613,40 @@ scm_read_sharp_extension (int chr, SCM port) /* The reader for the sharp `#' character. It basically dispatches reads among the above token readers. */ static SCM -scm_read_sharp (scm_t_wchar chr, SCM port, long line, int column) +scm_read_sharp (scm_t_wchar chr, SCM port, scm_t_read_opts *opts, + long line, int column) #define FUNC_NAME "scm_lreadr" { SCM result; chr = scm_getc (port); - result = scm_read_sharp_extension (chr, port); + result = scm_read_sharp_extension (chr, port, opts); if (!scm_is_eq (result, SCM_UNSPECIFIED)) return result; switch (chr) { case '\\': - return (scm_read_character (chr, port)); + return (scm_read_character (chr, port, opts)); case '(': - return (scm_read_vector (chr, port, line, column)); + return (scm_read_vector (chr, port, opts, line, column)); case 's': case 'u': case 'f': case 'c': /* This one may return either a boolean or an SRFI-4 vector. */ - return (scm_read_srfi4_vector (chr, port, line, column)); + return (scm_read_srfi4_vector (chr, port, opts, line, column)); case 'v': - return (scm_read_bytevector (chr, port, line, column)); + return (scm_read_bytevector (chr, port, opts, line, column)); case '*': - return (scm_read_guile_bit_vector (chr, port, line, column)); + return (scm_read_guile_bit_vector (chr, port, opts, line, column)); case 't': case 'T': case 'F': return (scm_read_boolean (chr, port)); case ':': - return (scm_read_keyword (chr, port)); + return (scm_read_keyword (chr, port, opts)); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '@': @@ -1342,7 +1657,7 @@ scm_read_sharp (scm_t_wchar chr, SCM port, long line, int column) case 'h': case 'l': #endif - return (scm_read_array (chr, port, line, column)); + return (scm_read_array (chr, port, opts, line, column)); case 'i': case 'e': @@ -1354,7 +1669,7 @@ scm_read_sharp (scm_t_wchar chr, SCM port, long line, int column) if (next_c != EOF) scm_ungetc (next_c, port); if (next_c == '(') - return scm_read_array (chr, port, line, column); + return scm_read_array (chr, port, opts, line, column); /* Fall through. */ } #endif @@ -1368,21 +1683,21 @@ scm_read_sharp (scm_t_wchar chr, SCM port, long line, int column) case 'X': case 'I': case 'E': - return (scm_read_number_and_radix (chr, port)); + return (scm_read_number_and_radix (chr, port, opts)); case '{': return (scm_read_extended_symbol (chr, port)); case '!': - return (scm_read_shebang (chr, port)); + return (scm_read_shebang (chr, port, opts)); case ';': - return (scm_read_commented_expression (chr, port)); + return (scm_read_commented_expression (chr, port, opts)); case '`': case '\'': case ',': - return (scm_read_syntax (chr, port)); + return (scm_read_syntax (chr, port, opts)); case 'n': - return (scm_read_nil (chr, port)); + return (scm_read_nil (chr, port, opts)); default: - result = scm_read_sharp_extension (chr, port); + result = scm_read_sharp_extension (chr, port, opts); if (scm_is_eq (result, SCM_UNSPECIFIED)) { /* To remain compatible with 1.8 and earlier, the following @@ -1406,7 +1721,7 @@ scm_read_sharp (scm_t_wchar chr, SCM port, long line, int column) #undef FUNC_NAME static SCM -scm_read_expression (SCM port) +scm_read_expression (SCM port, scm_t_read_opts *opts) #define FUNC_NAME "scm_read_expression" { while (1) @@ -1424,22 +1739,22 @@ scm_read_expression (SCM port) (void) scm_read_semicolon_comment (chr, port); break; case '[': - if (!SCM_SQUARE_BRACKETS_P) - return (scm_read_mixed_case_symbol (chr, port)); + if (!opts->square_brackets_p) + return (scm_read_mixed_case_symbol (chr, port, opts)); /* otherwise fall through */ case '(': - return (scm_read_sexp (chr, port)); + return (scm_read_sexp (chr, port, opts)); case '"': - return (scm_read_string (chr, port)); + return (scm_read_string (chr, port, opts)); case '\'': case '`': case ',': - return (scm_read_quote (chr, port)); + return (scm_read_quote (chr, port, opts)); case '#': { long line = SCM_LINUM (port); int column = SCM_COL (port) - 1; - SCM result = scm_read_sharp (chr, port, line, column); + SCM result = scm_read_sharp (chr, port, opts, line, column); if (scm_is_eq (result, SCM_UNSPECIFIED)) /* We read a comment or some such. */ break; @@ -1450,23 +1765,23 @@ scm_read_expression (SCM port) scm_i_input_error (FUNC_NAME, port, "unexpected \")\"", SCM_EOL); break; case ']': - if (SCM_SQUARE_BRACKETS_P) + if (opts->square_brackets_p) scm_i_input_error (FUNC_NAME, port, "unexpected \"]\"", SCM_EOL); /* otherwise fall through */ case EOF: return SCM_EOF_VAL; case ':': - if (scm_is_eq (SCM_PACK (SCM_KEYWORD_STYLE), scm_keyword_prefix)) - return scm_symbol_to_keyword (scm_read_expression (port)); + if (opts->keyword_style == KEYWORD_STYLE_PREFIX) + return scm_symbol_to_keyword (scm_read_expression (port, opts)); /* Fall through. */ default: { if (((chr >= '0') && (chr <= '9')) || (strchr ("+-.", chr))) - return (scm_read_number (chr, port)); + return (scm_read_number (chr, port, opts)); else - return (scm_read_mixed_case_symbol (chr, port)); + return (scm_read_mixed_case_symbol (chr, port, opts)); } } } @@ -1483,18 +1798,21 @@ SCM_DEFINE (scm_read, "read", 0, 1, 0, "Any whitespace before the next token is discarded.") #define FUNC_NAME s_scm_read { + scm_t_read_opts opts; int c; if (SCM_UNBNDP (port)) port = scm_current_input_port (); SCM_VALIDATE_OPINPORT (1, port); - c = flush_ws (port, (char *) NULL); + init_read_options (port, &opts); + + c = flush_ws (port, &opts, (char *) NULL); if (EOF == c) return SCM_EOF_VAL; scm_ungetc (c, port); - return (scm_read_expression (port)); + return (scm_read_expression (port, &opts)); } #undef FUNC_NAME diff --git a/test-suite/tests/reader.test b/test-suite/tests/reader.test index 60c853c..6e02255 100644 --- a/test-suite/tests/reader.test +++ b/test-suite/tests/reader.test @@ -401,6 +401,19 @@ (lambda () (read-disable 'hungry-eol-escapes)))))) +(with-test-prefix "per-port-read-options" + (pass-if "case-sensitive" + (equal? '(guile GuiLe gUIle) + (with-read-options '(case-insensitive) + (lambda () + (with-input-from-string "GUIle #!no-fold-case GuiLe gUIle" + (lambda () + (list (read) (read) (read)))))))) + (pass-if "case-insensitive" + (equal? '(GUIle guile guile) + (with-input-from-string "GUIle #!fold-case GuiLe gUIle" + (lambda () + (list (read) (read) (read))))))) (with-test-prefix "#;" (for-each -- 1.7.10.4