unofficial mirror of bug-guile@gnu.org 
 help / color / mirror / Atom feed
From: Ivan Sokolov <ivan-p-sokolov@ya.ru>
To: 48758@debbugs.gnu.org
Subject: bug#48758: [PATCH] Elisp reader does not support non-decimal integers
Date: Mon, 31 May 2021 15:04:42 +0300	[thread overview]
Message-ID: <87lf7v6rdx.fsf@ya.ru> (raw)

[-- Attachment #1: Type: text/plain, Size: 449 bytes --]


This patch improves Elisp compatibility.

I added support for binary, octal, hexadecimal and arbitrary radix
integer literals as described in Elisp manual [1], except for the final
period. It should not take long to add support for the final period, but
in this patch I did not do this because Emacs itself does not support
final period for non-decimal integers.

[1]: https://www.gnu.org/software/emacs/manual/html_node/elisp/Integer-Basics.html


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: elisp-numbers.diff --]
[-- Type: text/x-patch, Size: 9148 bytes --]

diff --git a/module/language/elisp/lexer.scm b/module/language/elisp/lexer.scm
index 5a0e6b3ff..1066ed0c2 100644
--- a/module/language/elisp/lexer.scm
+++ b/module/language/elisp/lexer.scm
@@ -186,9 +186,22 @@
 ;;; against regular expressions to determine if it is possibly an
 ;;; integer or a float.
 
-(define integer-regex (make-regexp "^[+-]?[0-9]+\\.?$"))
-
-(define float-regex
+(define make-integer-regexp
+  (let ((alphabet "0123456789abcdefghijklmnopqrstuvwxyz"))
+    (lambda (radix)
+      (unless (<= 2 radix 36)
+        (error "invalid radix" radix))
+      (let ((pat (string-append "^[+-]?[" (string-take alphabet radix) "]+$")))
+        (make-regexp pat regexp/icase)))))
+
+(define get-integer-regexp
+  (let ((ht (make-hash-table)))
+    (hash-set! ht 10 (make-regexp "^[+-]?[0-9]+\\.?$"))
+    (lambda (radix)
+      (or (hash-ref ht radix)
+          (hash-set! ht radix (make-integer-regexp radix))))))
+
+(define float-regexp
   (make-regexp
    "^[+-]?([0-9]+\\.?[0-9]*|[0-9]*\\.?[0-9]+)(e[+-]?[0-9]+)?$"))
 
@@ -197,7 +210,9 @@
 
 (define no-escape-punctuation (string->char-set "-+=*/_~!@$%^&:<>{}?."))
 
-(define (get-symbol-or-number port)
+(define (get-symbol-or-number port radix)
+  (define integer-regexp
+    (and=> radix get-integer-regexp))
   (let iterate ((result-chars '())
                 (had-escape #f))
     (let* ((c (read-char port))
@@ -207,10 +222,11 @@
                        (values
                         (cond
                          ((and (not had-escape)
-                               (regexp-exec integer-regex result))
+                               (regexp? integer-regexp)
+                               (regexp-exec integer-regexp result))
                           'integer)
                          ((and (not had-escape)
-                               (regexp-exec float-regex result))
+                               (regexp-exec float-regexp result))
                           'float)
                          (else 'symbol))
                         result))))
@@ -228,11 +244,20 @@
         (unread-char c port)
         (finish))))))
 
+(define (string->integer str radix)
+  (let* ((num (string->number str radix))
+         (exact (and=> num inexact->exact)))
+    (unless (number? num)
+      (error "expected number" str))
+    (unless (integer? exact)
+      (error "expected integer" str))
+    exact))
+
 ;;; Parse a circular structure marker without the leading # (which was
 ;;; already read and recognized), that is, a number as identifier and
 ;;; then either = or #.
 
-(define (get-circular-marker port)
+(define (get-circular-marker-or-number port)
   (call-with-values
       (lambda ()
         (let iterate ((result 0))
@@ -241,13 +266,20 @@
                 (let ((val (- (char->integer cur) (char->integer #\0))))
                   (iterate (+ (* result 10) val)))
                 (values result cur)))))
-    (lambda (id type)
-      (case type
-        ((#\#) `(circular-ref . ,id))
-        ((#\=) `(circular-def . ,id))
-        (else (lexer-error port
-                           "invalid circular marker character"
-                           type))))))
+    (lambda (result last-ch)
+      (case last-ch
+        ((#\#) `(circular-ref . ,result))
+        ((#\=) `(circular-def . ,result))
+        ((#\r)
+         (call-with-values
+             (lambda ()
+               (get-symbol-or-number port result))
+           (lambda (type str)
+             (unless (eq? type 'integer)
+               (error "invalid integer read syntax" type str result))
+             `(integer . ,(string->integer str result)))))
+        (else
+         (lexer-error port "invalid circular marker character" last-ch))))))
 
 ;;; Main lexer routine, which is given a port and does look for the next
 ;;; token.
@@ -334,13 +366,21 @@
           (case c
             ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)
              (unread-char c port)
-             (let ((mark (get-circular-marker port)))
+             (let ((mark (get-circular-marker-or-number port)))
                (return (car mark) (cdr mark))))
             ((#\')
              (return 'function #f))
+            ((#\b #\o #\x)
+             (let ((radix (case c ((#\b) 2) ((#\o) 8) ((#\x) 16))))
+               (call-with-values
+                   (lambda () (get-symbol-or-number port radix))
+                 (lambda (type str)
+                   (unless (eq? type 'integer)
+                     (error "invalid integer read syntax" type str radix))
+                   (return 'integer (string->integer str radix))))))
             ((#\:)
              (call-with-values
-                 (lambda () (get-symbol-or-number port))
+                 (lambda () (get-symbol-or-number port #f))
                (lambda (type str)
                  (return 'symbol (make-symbol str))))))))
         ;; Parentheses and other special-meaning single characters.
@@ -363,7 +403,7 @@
         (else
          (unread-char c port)
          (call-with-values
-             (lambda () (get-symbol-or-number port))
+             (lambda () (get-symbol-or-number port 10))
            (lambda (type str)
              (case type
                ((symbol)
@@ -384,12 +424,7 @@
                 ;; string->number returns an inexact real.  Thus we need
                 ;; a conversion here, but it should always result in an
                 ;; integer!
-                (return
-                 'integer
-                 (let ((num (inexact->exact (string->number str))))
-                   (if (not (integer? num))
-                       (error "expected integer" str num))
-                   num)))
+                (return 'integer (string->integer str 10)))
                ((float)
                 (return 'float (let ((num (string->number str)))
                                  (if (exact? num)
diff --git a/test-suite/tests/elisp-reader.test b/test-suite/tests/elisp-reader.test
index 669c4d592..b84e4756f 100644
--- a/test-suite/tests/elisp-reader.test
+++ b/test-suite/tests/elisp-reader.test
@@ -75,21 +75,45 @@
               (symbol . abc) (paren-open . #f) (symbol . def)
               (paren-close . #f) (symbol . ghi) (symbol . .e5))))
 
-  ; Here we make use of the property that exact/inexact numbers are not equal?
-  ; even when they have the same numeric value!
-  (pass-if "integers (decimal)"
-    (equal? (lex-string "-1 1 1. +1 01234")
-            '((integer . -1) (integer . 1) (integer . 1) (integer . 1)
-              (integer . 1234))))
-  (pass-if "integers (binary)"
-    (equal? (lex-string "#b-1 #b1 #b+1 #b10011010010")
-            '((integer . -1) (integer . 1) (integer . 1) (integer . 1234))))
-  (pass-if "integers (octal)"
-    (equal? (lex-string "#o-1 #o1 #o+1 #o2322")
-            '((integer . -1) (integer . 1) (integer . 1) (integer . 1234))))
-  (pass-if "integers (hexadecimal)"
-    (equal? (lex-string "#x-1 #x1 #x+1 #x4d2")
-            '((integer . -1) (integer . 1) (integer . 1) (integer . 1234))))
+  (with-test-prefix "integers"
+    ;; Here we make use of the property that exact/inexact numbers are not equal?
+    ;; even when they have the same numeric value!
+
+    (with-test-prefix "decimal"
+      (pass-if-equal "simple" '((integer . 1)) (lex-string "1"))
+      (pass-if-equal "positive" '((integer . 1)) (lex-string "+1"))
+      (pass-if-equal "negative" '((integer . -1)) (lex-string "-1"))
+      (pass-if-equal "trailing dot" '((integer . 1)) (lex-string "1."))
+      (pass-if-equal "all digits"
+          '((integer . 123456789)) (lex-string "0123456789")))
+
+    (with-test-prefix "binary"
+      (pass-if-equal "simple" '((integer . 1)) (lex-string "#b1"))
+      (pass-if-equal "positive" '((integer . 1)) (lex-string "#b+1"))
+      (pass-if-equal "negative" '((integer . -1)) (lex-string "#b-1"))
+      (pass-if-equal "all digits"
+          '((integer . #b010101)) (lex-string "#b010101")))
+
+    (with-test-prefix "octal"
+      (pass-if-equal "simple" '((integer . 1)) (lex-string "#o1"))
+      (pass-if-equal "positive" '((integer . 1)) (lex-string "#o+1"))
+      (pass-if-equal "negative" '((integer . -1)) (lex-string "#o-1"))
+      (pass-if-equal "all digits"
+          '((integer . #o01234567)) (lex-string "#o01234567")))
+
+    (with-test-prefix "hexadecimal"
+      (pass-if-equal "simple" '((integer . 1)) (lex-string "#x1"))
+      (pass-if-equal "positive" '((integer . 1)) (lex-string "#x+1"))
+      (pass-if-equal "negative" '((integer . -1)) (lex-string "#x-1"))
+      (pass-if-equal "all digits"
+          '((integer . #x0123456789abcdef)) (lex-string "#x0123456789aBcDeF")))
+
+    (with-test-prefix "arbitrary radix"
+      (pass-if-equal "simple" '((integer . 44)) (lex-string "#24r1k"))
+      (pass-if-equal "positive" '((integer . 44)) (lex-string "#24r+1k"))
+      (pass-if-equal "negative" '((integer . -44)) (lex-string "#24r-1k"))
+      (pass-if-equal "max radix" '((integer . 35)) (lex-string "#36rz"))))
+
   (pass-if "floats"
     (equal? (lex-string "1500.0 15e2 15.e2 1.5e3 .15e4 -.345e-2")
             '((float . 1500.0) (float . 1500.0) (float . 1500.0)

                 reply	other threads:[~2021-05-31 12:04 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://www.gnu.org/software/guile/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87lf7v6rdx.fsf@ya.ru \
    --to=ivan-p-sokolov@ya.ru \
    --cc=48758@debbugs.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).