diff --git a/module/language/elisp/lexer.scm b/module/language/elisp/lexer.scm index 5a0e6b3ff..1066ed0c2 100644 --- a/module/language/elisp/lexer.scm +++ b/module/language/elisp/lexer.scm @@ -186,9 +186,22 @@ ;;; against regular expressions to determine if it is possibly an ;;; integer or a float. -(define integer-regex (make-regexp "^[+-]?[0-9]+\\.?$")) - -(define float-regex +(define make-integer-regexp + (let ((alphabet "0123456789abcdefghijklmnopqrstuvwxyz")) + (lambda (radix) + (unless (<= 2 radix 36) + (error "invalid radix" radix)) + (let ((pat (string-append "^[+-]?[" (string-take alphabet radix) "]+$"))) + (make-regexp pat regexp/icase))))) + +(define get-integer-regexp + (let ((ht (make-hash-table))) + (hash-set! ht 10 (make-regexp "^[+-]?[0-9]+\\.?$")) + (lambda (radix) + (or (hash-ref ht radix) + (hash-set! ht radix (make-integer-regexp radix)))))) + +(define float-regexp (make-regexp "^[+-]?([0-9]+\\.?[0-9]*|[0-9]*\\.?[0-9]+)(e[+-]?[0-9]+)?$")) @@ -197,7 +210,9 @@ (define no-escape-punctuation (string->char-set "-+=*/_~!@$%^&:<>{}?.")) -(define (get-symbol-or-number port) +(define (get-symbol-or-number port radix) + (define integer-regexp + (and=> radix get-integer-regexp)) (let iterate ((result-chars '()) (had-escape #f)) (let* ((c (read-char port)) @@ -207,10 +222,11 @@ (values (cond ((and (not had-escape) - (regexp-exec integer-regex result)) + (regexp? integer-regexp) + (regexp-exec integer-regexp result)) 'integer) ((and (not had-escape) - (regexp-exec float-regex result)) + (regexp-exec float-regexp result)) 'float) (else 'symbol)) result)))) @@ -228,11 +244,20 @@ (unread-char c port) (finish)))))) +(define (string->integer str radix) + (let* ((num (string->number str radix)) + (exact (and=> num inexact->exact))) + (unless (number? num) + (error "expected number" str)) + (unless (integer? exact) + (error "expected integer" str)) + exact)) + ;;; Parse a circular structure marker without the leading # (which was ;;; already read and recognized), that is, a number as identifier and ;;; then either = or #. -(define (get-circular-marker port) +(define (get-circular-marker-or-number port) (call-with-values (lambda () (let iterate ((result 0)) @@ -241,13 +266,20 @@ (let ((val (- (char->integer cur) (char->integer #\0)))) (iterate (+ (* result 10) val))) (values result cur))))) - (lambda (id type) - (case type - ((#\#) `(circular-ref . ,id)) - ((#\=) `(circular-def . ,id)) - (else (lexer-error port - "invalid circular marker character" - type)))))) + (lambda (result last-ch) + (case last-ch + ((#\#) `(circular-ref . ,result)) + ((#\=) `(circular-def . ,result)) + ((#\r) + (call-with-values + (lambda () + (get-symbol-or-number port result)) + (lambda (type str) + (unless (eq? type 'integer) + (error "invalid integer read syntax" type str result)) + `(integer . ,(string->integer str result))))) + (else + (lexer-error port "invalid circular marker character" last-ch)))))) ;;; Main lexer routine, which is given a port and does look for the next ;;; token. @@ -334,13 +366,21 @@ (case c ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9) (unread-char c port) - (let ((mark (get-circular-marker port))) + (let ((mark (get-circular-marker-or-number port))) (return (car mark) (cdr mark)))) ((#\') (return 'function #f)) + ((#\b #\o #\x) + (let ((radix (case c ((#\b) 2) ((#\o) 8) ((#\x) 16)))) + (call-with-values + (lambda () (get-symbol-or-number port radix)) + (lambda (type str) + (unless (eq? type 'integer) + (error "invalid integer read syntax" type str radix)) + (return 'integer (string->integer str radix)))))) ((#\:) (call-with-values - (lambda () (get-symbol-or-number port)) + (lambda () (get-symbol-or-number port #f)) (lambda (type str) (return 'symbol (make-symbol str)))))))) ;; Parentheses and other special-meaning single characters. @@ -363,7 +403,7 @@ (else (unread-char c port) (call-with-values - (lambda () (get-symbol-or-number port)) + (lambda () (get-symbol-or-number port 10)) (lambda (type str) (case type ((symbol) @@ -384,12 +424,7 @@ ;; string->number returns an inexact real. Thus we need ;; a conversion here, but it should always result in an ;; integer! - (return - 'integer - (let ((num (inexact->exact (string->number str)))) - (if (not (integer? num)) - (error "expected integer" str num)) - num))) + (return 'integer (string->integer str 10))) ((float) (return 'float (let ((num (string->number str))) (if (exact? num) diff --git a/test-suite/tests/elisp-reader.test b/test-suite/tests/elisp-reader.test index 669c4d592..b84e4756f 100644 --- a/test-suite/tests/elisp-reader.test +++ b/test-suite/tests/elisp-reader.test @@ -75,21 +75,45 @@ (symbol . abc) (paren-open . #f) (symbol . def) (paren-close . #f) (symbol . ghi) (symbol . .e5)))) - ; Here we make use of the property that exact/inexact numbers are not equal? - ; even when they have the same numeric value! - (pass-if "integers (decimal)" - (equal? (lex-string "-1 1 1. +1 01234") - '((integer . -1) (integer . 1) (integer . 1) (integer . 1) - (integer . 1234)))) - (pass-if "integers (binary)" - (equal? (lex-string "#b-1 #b1 #b+1 #b10011010010") - '((integer . -1) (integer . 1) (integer . 1) (integer . 1234)))) - (pass-if "integers (octal)" - (equal? (lex-string "#o-1 #o1 #o+1 #o2322") - '((integer . -1) (integer . 1) (integer . 1) (integer . 1234)))) - (pass-if "integers (hexadecimal)" - (equal? (lex-string "#x-1 #x1 #x+1 #x4d2") - '((integer . -1) (integer . 1) (integer . 1) (integer . 1234)))) + (with-test-prefix "integers" + ;; Here we make use of the property that exact/inexact numbers are not equal? + ;; even when they have the same numeric value! + + (with-test-prefix "decimal" + (pass-if-equal "simple" '((integer . 1)) (lex-string "1")) + (pass-if-equal "positive" '((integer . 1)) (lex-string "+1")) + (pass-if-equal "negative" '((integer . -1)) (lex-string "-1")) + (pass-if-equal "trailing dot" '((integer . 1)) (lex-string "1.")) + (pass-if-equal "all digits" + '((integer . 123456789)) (lex-string "0123456789"))) + + (with-test-prefix "binary" + (pass-if-equal "simple" '((integer . 1)) (lex-string "#b1")) + (pass-if-equal "positive" '((integer . 1)) (lex-string "#b+1")) + (pass-if-equal "negative" '((integer . -1)) (lex-string "#b-1")) + (pass-if-equal "all digits" + '((integer . #b010101)) (lex-string "#b010101"))) + + (with-test-prefix "octal" + (pass-if-equal "simple" '((integer . 1)) (lex-string "#o1")) + (pass-if-equal "positive" '((integer . 1)) (lex-string "#o+1")) + (pass-if-equal "negative" '((integer . -1)) (lex-string "#o-1")) + (pass-if-equal "all digits" + '((integer . #o01234567)) (lex-string "#o01234567"))) + + (with-test-prefix "hexadecimal" + (pass-if-equal "simple" '((integer . 1)) (lex-string "#x1")) + (pass-if-equal "positive" '((integer . 1)) (lex-string "#x+1")) + (pass-if-equal "negative" '((integer . -1)) (lex-string "#x-1")) + (pass-if-equal "all digits" + '((integer . #x0123456789abcdef)) (lex-string "#x0123456789aBcDeF"))) + + (with-test-prefix "arbitrary radix" + (pass-if-equal "simple" '((integer . 44)) (lex-string "#24r1k")) + (pass-if-equal "positive" '((integer . 44)) (lex-string "#24r+1k")) + (pass-if-equal "negative" '((integer . -44)) (lex-string "#24r-1k")) + (pass-if-equal "max radix" '((integer . 35)) (lex-string "#36rz")))) + (pass-if "floats" (equal? (lex-string "1500.0 15e2 15.e2 1.5e3 .15e4 -.345e-2") '((float . 1500.0) (float . 1500.0) (float . 1500.0)