unofficial mirror of guile-devel@gnu.org 
 help / color / mirror / Atom feed
* PEG Patches
@ 2011-03-06  5:25 Noah Lavine
  2011-03-07  1:28 ` Noah Lavine
  2011-03-25 18:06 ` Andy Wingo
  0 siblings, 2 replies; 10+ messages in thread
From: Noah Lavine @ 2011-03-06  5:25 UTC (permalink / raw)
  To: guile-devel

[-- Attachment #1: Type: text/plain, Size: 1208 bytes --]

Hello all,

Attached is a series of patches I've made for the wip-mlucy branch. It
splits the PEG code into several little modules which go in
module/ice-9/peg/. The original peg source file becomes very little.
At the end it finally loses its big eval-when wrapper.

There's one part of this that I'm not satisfied with, which is that
define-nonterminal goes into module/ice-9/peg/string-peg.scm, which is
supposed to be solely for pegs-as-strings. The reason for this is that
I got compiler errors if I didn't do this, and I couldn't figure out
how to stop them. I would appreciate it if someone would take a look
and try to find what I missed.

Also, a note about future ideas - the current PEG code can only parse
strings. However, there is almost nothing string-specific about the
parsing code - just a few calls to string-ref and substring in
codegen.scm. I'd like to see this extended to parse vectors filled
with arbitrary objects. This would let you use a tokenizer with it,
which is the easiest way to implement C correctly, and also probably
the easiest way to store line number information with tokens, which is
necessary for ultimately giving good error messages from PEG parsers.

Thanks,
Noah

[-- Attachment #2: 0001-Split-peg.scm.patch --]
[-- Type: application/octet-stream, Size: 19991 bytes --]

From a73cfb546fc6c044af71f0d48a63f061bca2f740 Mon Sep 17 00:00:00 2001
From: Noah Lavine <nlavine@haverford.edu>
Date: Sat, 5 Mar 2011 15:23:59 -0500
Subject: [PATCH 1/7] Split peg.scm

 * module/ice-9/peg.scm: move code generators to new module
 * module/ice-9/peg/codegen.scm: new module for PEG code generators
---
 module/ice-9/peg.scm         |  223 +-------------------------------------
 module/ice-9/peg/codegen.scm |  245 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 250 insertions(+), 218 deletions(-)
 create mode 100644 module/ice-9/peg/codegen.scm

diff --git a/module/ice-9/peg.scm b/module/ice-9/peg.scm
index d91a74e..0acc459 100644
--- a/module/ice-9/peg.scm
+++ b/module/ice-9/peg.scm
@@ -18,8 +18,7 @@
 ;;;;
 
 (define-module (ice-9 peg)
-  #:export (peg-sexp-compile
-            peg-string-compile
+  #:export (peg-string-compile
             context-flatten
             peg-parse
             define-nonterm
@@ -34,8 +33,9 @@
             peg:substring
             peg-record?
             keyword-flatten)
-  #:use-module (system base pmatch)
-  #:use-module (ice-9 pretty-print))
+  #:use-module (ice-9 peg codegen)
+  #:re-export (peg-sexp-compile)
+  #:use-module (system base pmatch))
 
 ;;;
 ;;; Helper Macros
@@ -58,222 +58,9 @@ execute the STMTs and try again."
        ((_) #t)
        (else #f)))))
 
-(define-syntax push!
-  (syntax-rules ()
-    "Push an object onto a list."
-    ((_ lst obj)
-     (set! lst (cons obj lst)))))
-
-(define-syntax single-filter
-  (syntax-rules ()
-    "If EXP is a list of one element, return the element.  Otherwise
-return EXP."
-    ((_ exp)
-     (pmatch exp
-       ((,elt) elt)
-       (,elts elts)))))
-
-(define-syntax push-not-null!
-  (syntax-rules ()
-    "If OBJ is non-null, push it onto LST, otherwise do nothing."
-    ((_ lst obj)
-     (if (not (null? obj))
-         (push! lst obj)))))
-
 (eval-when (compile load eval)
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;;;; CODE GENERATORS
-;; These functions generate scheme code for parsing PEGs.
-;; Conventions:
-;;   accum: (all name body none)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;; Code we generate will have a certain return structure depending on how we're
-;; accumulating (the ACCUM variable).
-(define (cg-generic-ret accum name body-uneval at)
-  ;; name, body-uneval and at are syntax
-  #`(let ((body #,body-uneval))
-     #,(cond
-        ((and (eq? accum 'all) name)
-         #`(list #,at
-                 (cond
-                  ((not (list? body)) (list '#,name body))
-                  ((null? body) '#,name)
-                  ((symbol? (car body)) (list '#,name body))
-                  (else (cons '#,name body)))))
-        ((eq? accum 'name)
-         #`(list #,at '#,name))
-        ((eq? accum 'body)
-         #`(list #,at
-                 (cond
-                  ((single? body) (car body))
-                  (else body))))
-        ((eq? accum 'none)
-         #`(list #,at '()))
-        (else
-         (begin
-           (pretty-print `(cg-generic-ret-error ,accum ,name ,body-uneval ,at))
-           (pretty-print "Defaulting to accum of none.\n")
-           #`(list #,at '()))))))
-
-;; The short name makes the formatting below much easier to read.
-(define cggr cg-generic-ret)
-
-;; Generates code that matches a particular string.
-;; E.g.: (cg-string syntax "abc" 'body)
-(define (cg-string pat accum)
-  (let ((plen (string-length pat)))
-    #`(lambda (str len pos)
-        (let ((end (+ pos #,plen)))
-          (and (<= end len)
-               (string= str #,pat pos end)
-               #,(case accum
-                   ((all) #`(list end (list 'cg-string #,pat)))
-                   ((name) #`(list end 'cg-string))
-                   ((body) #`(list end #,pat))
-                   ((none) #`(list end '()))
-                   (else (error "bad accum" accum))))))))
-
-;; Generates code for matching any character.
-;; E.g.: (cg-peg-any syntax 'body)
-(define (cg-peg-any accum)
-  #`(lambda (str len pos)
-      (and (< pos len)
-           #,(case accum
-               ((all) #`(list (1+ pos)
-                              (list 'cg-peg-any (substring str pos (1+ pos)))))
-               ((name) #`(list (1+ pos) 'cg-peg-any))
-               ((body) #`(list (1+ pos) (substring str pos (1+ pos))))
-               ((none) #`(list (1+ pos) '()))
-               (else (error "bad accum" accum))))))
-
-;; Generates code for matching a range of characters between start and end.
-;; E.g.: (cg-range syntax #\a #\z 'body)
-(define (cg-range start end accum)
-  #`(lambda (str len pos)
-      (and (< pos len)
-           (let ((c (string-ref str pos)))
-             (and (char>=? c #,start)
-                  (char<=? c #,end)
-                  #,(case accum
-                      ((all) #`(list (1+ pos) (list 'cg-range (string c))))
-                      ((name) #`(list (1+ pos) 'cg-range))
-                      ((body) #`(list (1+ pos) (string c)))
-                      ((none) #`(list (1+ pos) '()))
-                      (else (error "bad accum" accum))))))))
-
-;; Filters the accum argument to peg-sexp-compile for buildings like string
-;; literals (since we don't want to tag them with their name if we're doing an
-;; "all" accum).
-(define (builtin-accum-filter accum)
-  (cond
-   ((eq? accum 'all) 'body)
-   ((eq? accum 'name) 'name)
-   ((eq? accum 'body) 'body)
-   ((eq? accum 'none) 'none)))
-(define baf builtin-accum-filter)
-
-;; Takes an arbitrary expressions and accumulation variable, then parses it.
-;; E.g.: (peg-sexp-compile syntax '(and "abc" (or "-" (range #\a #\z))) 'all)
-(define (peg-sexp-compile pat accum)
-  (syntax-case pat (peg-any range ignore capture peg and or body)
-    (peg-any
-     (cg-peg-any (baf accum)))
-    (sym (identifier? #'sym) ;; nonterminal
-     #'sym)
-    (str (string? (syntax->datum #'str)) ;; literal string
-     (cg-string (syntax->datum #'str) (baf accum)))
-    ((range start end) ;; range of characters (e.g. [a-z])
-     (and (char? (syntax->datum #'start)) (char? (syntax->datum #'end)))
-     (cg-range (syntax->datum #'start) (syntax->datum #'end) (baf accum)))
-    ((ignore pat) ;; match but don't parse
-     (peg-sexp-compile #'pat 'none))
-    ((capture pat) ;; parse
-     (peg-sexp-compile #'pat 'body))
-    ((peg pat)  ;; embedded PEG string
-     (string? (syntax->datum #'pat))
-     (peg-string-compile #'pat (baf accum)))
-    ((and pat ...)
-     (cg-and #'(pat ...) (baf accum)))
-    ((or pat ...)
-     (cg-or #'(pat ...) (baf accum)))
-    ((body type pat num)
-     (cg-body (baf accum) #'type #'pat #'num))))
-
-;; Top-level function builder for AND.  Reduces to a call to CG-AND-INT.
-(define (cg-and clauses accum)
-  #`(lambda (str len pos)
-      (let ((body '()))
-        #,(cg-and-int clauses accum #'str #'len #'pos #'body))))
-
-;; Internal function builder for AND (calls itself).
-(define (cg-and-int clauses accum str strlen at body)
-  (syntax-case clauses ()
-    (()
-     (cggr accum 'cg-and #`(reverse #,body) at))
-    ((first rest ...)
-     #`(let ((res (#,(peg-sexp-compile #'first accum) #,str #,strlen #,at)))
-         (and res 
-              ;; update AT and BODY then recurse
-              (let ((newat (car res))
-                    (newbody (cadr res)))
-                (set! #,at newat)
-                (push-not-null! #,body (single-filter newbody))
-                #,(cg-and-int #'(rest ...) accum str strlen at body)))))))
-
-;; Top-level function builder for OR.  Reduces to a call to CG-OR-INT.
-(define (cg-or clauses accum)
-  #`(lambda (str len pos)
-      #,(cg-or-int clauses accum #'str #'len #'pos)))
-
-;; Internal function builder for OR (calls itself).
-(define (cg-or-int clauses accum str strlen at)
-  (syntax-case clauses ()
-    (()
-     #f)
-    ((first rest ...)
-     #`(or (#,(peg-sexp-compile #'first accum) #,str #,strlen #,at)
-           #,(cg-or-int #'(rest ...) accum str strlen at)))))
-
-;; Returns a function that parses a BODY element.
-(define (cg-body accum type pat num)
-  #`(lambda (str strlen at)
-      (let ((body '()))
-        (let lp ((end at) (count 0))
-          (let* ((match (#,(peg-sexp-compile pat accum) str strlen end))
-                 (new-end (if match (car match) end))
-                 (count (if (> new-end end) (1+ count) count)))
-            (if (> new-end end)
-                (push-not-null! body (single-filter (cadr match))))
-            (if (and (> new-end end)
-                     #,(syntax-case num (+ * ?)
-                         (n (number? (syntax->datum #'n))
-                            #'(< count n))
-                         (+ #t)
-                         (* #t)
-                         (? #'(< count 1))))
-                (lp new-end count)
-                (let ((success #,(syntax-case num (+ * ?)
-                                   (n (number? (syntax->datum #'n))
-                                      #'(= count n))
-                                   (+ #'(>= count 1))
-                                   (* #t)
-                                   (? #t))))
-                  #,(syntax-case type (! & lit)
-                      (!
-                       #`(if success
-                             #f
-                             #,(cggr accum 'cg-body #''() #'at)))
-                      (&
-                       #`(and success
-                              #,(cggr accum 'cg-body #''() #'at)))
-                      (lit
-                       #`(and success
-                              #,(cggr accum 'cg-body #'(reverse body) #'new-end)))))))))))
-
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;;;; FOR DEFINING AND USING NONTERMINALS
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
@@ -511,7 +298,7 @@ RB < ']'
   (let ((parsed (peg-parse peg-grammar str)))
     (if (not parsed)
         (begin
-          ;; (pretty-print "Invalid PEG grammar!\n")
+          ;; (display "Invalid PEG grammar!\n")
           #f)
         (let ((lst (peg:tree parsed)))
           (cond
diff --git a/module/ice-9/peg/codegen.scm b/module/ice-9/peg/codegen.scm
new file mode 100644
index 0000000..43f44cc
--- /dev/null
+++ b/module/ice-9/peg/codegen.scm
@@ -0,0 +1,245 @@
+;;;; codegen.scm --- code generation for composable parsers
+;;;;
+;;;; 	Copyright (C) 2011 Free Software Foundation, Inc.
+;;;;
+;;;; This library is free software; you can redistribute it and/or
+;;;; modify it under the terms of the GNU Lesser General Public
+;;;; License as published by the Free Software Foundation; either
+;;;; version 3 of the License, or (at your option) any later version.
+;;;; 
+;;;; This library is distributed in the hope that it will be useful,
+;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;;;; Lesser General Public License for more details.
+;;;; 
+;;;; You should have received a copy of the GNU Lesser General Public
+;;;; License along with this library; if not, write to the Free Software
+;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;;;;
+
+(define-module (ice-9 peg codegen)
+  #:export (peg-sexp-compile)
+  #:use-module (ice-9 peg)
+  #:use-module (ice-9 pretty-print)
+  #:use-module (system base pmatch))
+
+(define-syntax single?
+  (syntax-rules ()
+    "Return #t if X is a list of one element."
+    ((_ x)
+     (pmatch x
+       ((_) #t)
+       (else #f)))))
+
+(define-syntax single-filter
+  (syntax-rules ()
+    "If EXP is a list of one element, return the element.  Otherwise
+return EXP."
+    ((_ exp)
+     (pmatch exp
+       ((,elt) elt)
+       (,elts elts)))))
+
+(define-syntax push-not-null!
+  (syntax-rules ()
+    "If OBJ is non-null, push it onto LST, otherwise do nothing."
+    ((_ lst obj)
+     (if (not (null? obj))
+         (push! lst obj)))))
+
+(define-syntax push!
+  (syntax-rules ()
+    "Push an object onto a list."
+    ((_ lst obj)
+     (set! lst (cons obj lst)))))
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;; CODE GENERATORS
+;; These functions generate scheme code for parsing PEGs.
+;; Conventions:
+;;   accum: (all name body none)
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Code we generate will have a certain return structure depending on how we're
+;; accumulating (the ACCUM variable).
+(define (cg-generic-ret accum name body-uneval at)
+  ;; name, body-uneval and at are syntax
+  #`(let ((body #,body-uneval))
+     #,(cond
+        ((and (eq? accum 'all) name)
+         #`(list #,at
+                 (cond
+                  ((not (list? body)) (list '#,name body))
+                  ((null? body) '#,name)
+                  ((symbol? (car body)) (list '#,name body))
+                  (else (cons '#,name body)))))
+        ((eq? accum 'name)
+         #`(list #,at '#,name))
+        ((eq? accum 'body)
+         #`(list #,at
+                 (cond
+                  ((single? body) (car body))
+                  (else body))))
+        ((eq? accum 'none)
+         #`(list #,at '()))
+        (else
+         (begin
+           (pretty-print `(cg-generic-ret-error ,accum ,name ,body-uneval ,at))
+           (pretty-print "Defaulting to accum of none.\n")
+           #`(list #,at '()))))))
+
+;; The short name makes the formatting below much easier to read.
+(define cggr cg-generic-ret)
+
+;; Generates code that matches a particular string.
+;; E.g.: (cg-string syntax "abc" 'body)
+(define (cg-string pat accum)
+  (let ((plen (string-length pat)))
+    #`(lambda (str len pos)
+        (let ((end (+ pos #,plen)))
+          (and (<= end len)
+               (string= str #,pat pos end)
+               #,(case accum
+                   ((all) #`(list end (list 'cg-string #,pat)))
+                   ((name) #`(list end 'cg-string))
+                   ((body) #`(list end #,pat))
+                   ((none) #`(list end '()))
+                   (else (error "bad accum" accum))))))))
+
+;; Generates code for matching any character.
+;; E.g.: (cg-peg-any syntax 'body)
+(define (cg-peg-any accum)
+  #`(lambda (str len pos)
+      (and (< pos len)
+           #,(case accum
+               ((all) #`(list (1+ pos)
+                              (list 'cg-peg-any (substring str pos (1+ pos)))))
+               ((name) #`(list (1+ pos) 'cg-peg-any))
+               ((body) #`(list (1+ pos) (substring str pos (1+ pos))))
+               ((none) #`(list (1+ pos) '()))
+               (else (error "bad accum" accum))))))
+
+;; Generates code for matching a range of characters between start and end.
+;; E.g.: (cg-range syntax #\a #\z 'body)
+(define (cg-range start end accum)
+  #`(lambda (str len pos)
+      (and (< pos len)
+           (let ((c (string-ref str pos)))
+             (and (char>=? c #,start)
+                  (char<=? c #,end)
+                  #,(case accum
+                      ((all) #`(list (1+ pos) (list 'cg-range (string c))))
+                      ((name) #`(list (1+ pos) 'cg-range))
+                      ((body) #`(list (1+ pos) (string c)))
+                      ((none) #`(list (1+ pos) '()))
+                      (else (error "bad accum" accum))))))))
+
+;; Filters the accum argument to peg-sexp-compile for buildings like string
+;; literals (since we don't want to tag them with their name if we're doing an
+;; "all" accum).
+(define (builtin-accum-filter accum)
+  (cond
+   ((eq? accum 'all) 'body)
+   ((eq? accum 'name) 'name)
+   ((eq? accum 'body) 'body)
+   ((eq? accum 'none) 'none)))
+(define baf builtin-accum-filter)
+
+;; Takes an arbitrary expressions and accumulation variable, then parses it.
+;; E.g.: (peg-sexp-compile syntax '(and "abc" (or "-" (range #\a #\z))) 'all)
+(define (peg-sexp-compile pat accum)
+  (syntax-case pat (peg-any range ignore capture peg and or body)
+    (peg-any
+     (cg-peg-any (baf accum)))
+    (sym (identifier? #'sym) ;; nonterminal
+     #'sym)
+    (str (string? (syntax->datum #'str)) ;; literal string
+     (cg-string (syntax->datum #'str) (baf accum)))
+    ((range start end) ;; range of characters (e.g. [a-z])
+     (and (char? (syntax->datum #'start)) (char? (syntax->datum #'end)))
+     (cg-range (syntax->datum #'start) (syntax->datum #'end) (baf accum)))
+    ((ignore pat) ;; match but don't parse
+     (peg-sexp-compile #'pat 'none))
+    ((capture pat) ;; parse
+     (peg-sexp-compile #'pat 'body))
+    ((peg pat)  ;; embedded PEG string
+     (string? (syntax->datum #'pat))
+     (peg-string-compile #'pat (baf accum)))
+    ((and pat ...)
+     (cg-and #'(pat ...) (baf accum)))
+    ((or pat ...)
+     (cg-or #'(pat ...) (baf accum)))
+    ((body type pat num)
+     (cg-body (baf accum) #'type #'pat #'num))))
+
+;; Top-level function builder for AND.  Reduces to a call to CG-AND-INT.
+(define (cg-and clauses accum)
+  #`(lambda (str len pos)
+      (let ((body '()))
+        #,(cg-and-int clauses accum #'str #'len #'pos #'body))))
+
+;; Internal function builder for AND (calls itself).
+(define (cg-and-int clauses accum str strlen at body)
+  (syntax-case clauses ()
+    (()
+     (cggr accum 'cg-and #`(reverse #,body) at))
+    ((first rest ...)
+     #`(let ((res (#,(peg-sexp-compile #'first accum) #,str #,strlen #,at)))
+         (and res 
+              ;; update AT and BODY then recurse
+              (let ((newat (car res))
+                    (newbody (cadr res)))
+                (set! #,at newat)
+                (push-not-null! #,body (single-filter newbody))
+                #,(cg-and-int #'(rest ...) accum str strlen at body)))))))
+
+;; Top-level function builder for OR.  Reduces to a call to CG-OR-INT.
+(define (cg-or clauses accum)
+  #`(lambda (str len pos)
+      #,(cg-or-int clauses accum #'str #'len #'pos)))
+
+;; Internal function builder for OR (calls itself).
+(define (cg-or-int clauses accum str strlen at)
+  (syntax-case clauses ()
+    (()
+     #f)
+    ((first rest ...)
+     #`(or (#,(peg-sexp-compile #'first accum) #,str #,strlen #,at)
+           #,(cg-or-int #'(rest ...) accum str strlen at)))))
+
+;; Returns a function that parses a BODY element.
+(define (cg-body accum type pat num)
+  #`(lambda (str strlen at)
+      (let ((body '()))
+        (let lp ((end at) (count 0))
+          (let* ((match (#,(peg-sexp-compile pat accum) str strlen end))
+                 (new-end (if match (car match) end))
+                 (count (if (> new-end end) (1+ count) count)))
+            (if (> new-end end)
+                (push-not-null! body (single-filter (cadr match))))
+            (if (and (> new-end end)
+                     #,(syntax-case num (+ * ?)
+                         (n (number? (syntax->datum #'n))
+                            #'(< count n))
+                         (+ #t)
+                         (* #t)
+                         (? #'(< count 1))))
+                (lp new-end count)
+                (let ((success #,(syntax-case num (+ * ?)
+                                   (n (number? (syntax->datum #'n))
+                                      #'(= count n))
+                                   (+ #'(>= count 1))
+                                   (* #t)
+                                   (? #t))))
+                  #,(syntax-case type (! & lit)
+                      (!
+                       #`(if success
+                             #f
+                             #,(cggr accum 'cg-body #''() #'at)))
+                      (&
+                       #`(and success
+                              #,(cggr accum 'cg-body #''() #'at)))
+                      (lit
+                       #`(and success
+                              #,(cggr accum 'cg-body #'(reverse body) #'new-end)))))))))))
-- 
1.7.4.1


[-- Attachment #3: 0002-Rename-in-peg.scm.patch --]
[-- Type: application/octet-stream, Size: 4011 bytes --]

From 3f76e95b6a8b7cfe72fd6a328bb94f621b9e248e Mon Sep 17 00:00:00 2001
From: Noah Lavine <nlavine@haverford.edu>
Date: Sat, 5 Mar 2011 16:23:05 -0500
Subject: [PATCH 2/7] Rename in peg.scm

 * module/ice-9/peg.scm: rename peg-parse-* functions to avoid confusion
    with what PEGs do.
---
 module/ice-9/peg.scm |   36 ++++++++++++++++++------------------
 1 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/module/ice-9/peg.scm b/module/ice-9/peg.scm
index 0acc459..e256c2d 100644
--- a/module/ice-9/peg.scm
+++ b/module/ice-9/peg.scm
@@ -305,7 +305,7 @@ RB < ']'
            ((or (not (list? lst)) (null? lst))
             lst)
            ((eq? (car lst) 'peg-grammar)
-            (cons 'begin (map (lambda (x) (peg-parse-nonterm x))
+            (cons 'begin (map (lambda (x) (peg-nonterm->defn x))
                               (context-flatten (lambda (lst) (<= (depth lst) 2))
                                           (cdr lst))))))))))
 
@@ -319,7 +319,7 @@ RB < ']'
 (define define-grammar-f peg-parser)
 
 ;; Parse a nonterminal and pattern listed in LST.
-(define (peg-parse-nonterm lst)
+(define (peg-nonterm->defn lst)
   (let ((nonterm (car lst))
         (grabber (cadr lst))
         (pattern (caddr lst)))
@@ -328,23 +328,23 @@ RB < ']'
          ((string=? grabber "<--") 'all)
          ((string=? grabber "<-") 'body)
          (else 'none))
-       ,(compressor (peg-parse-pattern pattern)))))
+       ,(compressor (peg-pattern->defn pattern)))))
 
 ;; Parse a pattern.
-(define (peg-parse-pattern lst)
-  (cons 'or (map peg-parse-alternative
+(define (peg-pattern->defn lst)
+  (cons 'or (map peg-alternative->defn
                  (context-flatten (lambda (x) (eq? (car x) 'peg-alternative))
                              (cdr lst)))))
 
 ;; Parse an alternative.
-(define (peg-parse-alternative lst)
-  (cons 'and (map peg-parse-body
+(define (peg-alternative->defn lst)
+  (cons 'and (map peg-body->defn
                   (context-flatten (lambda (x) (or (string? (car x))
                                               (eq? (car x) 'peg-suffix)))
                               (cdr lst)))))
 
 ;; Parse a body.
-(define (peg-parse-body lst)
+(define (peg-body->defn lst)
   (let ((suffix '())
         (front 'lit))
     (cond
@@ -354,41 +354,41 @@ RB < ']'
       (begin (set! front (string->symbol (car lst)))
              (set! suffix (cadr lst))))
      (else `(peg-parse-body-fail ,lst)))
-    `(body ,front ,@(peg-parse-suffix suffix))))
+    `(body ,front ,@(peg-suffix->defn suffix))))
 
 ;; Parse a suffix.
-(define (peg-parse-suffix lst)
-  (list (peg-parse-primary (cadr lst))
+(define (peg-suffix->defn lst)
+  (list (peg-primary->defn (cadr lst))
         (if (null? (cddr lst))
             1
             (string->symbol (caddr lst)))))
 
 ;; Parse a primary.
-(define (peg-parse-primary lst)
+(define (peg-primary->defn lst)
   (let ((el (cadr lst)))
   (cond
    ((list? el)
     (cond
      ((eq? (car el) 'peg-literal)
-      (peg-parse-literal el))
+      (peg-literal->defn el))
      ((eq? (car el) 'peg-charclass)
-      (peg-parse-charclass el))
+      (peg-charclass->defn el))
      ((eq? (car el) 'peg-nonterminal)
       (string->symbol (cadr el)))))
    ((string? el)
     (cond
      ((equal? el "(")
-      (peg-parse-pattern (caddr lst)))
+      (peg-pattern->defn (caddr lst)))
      ((equal? el ".")
       'peg-any)
      (else `(peg-parse-any unknown-string ,lst))))
    (else `(peg-parse-any unknown-el ,lst)))))
 
 ;; Parses a literal.
-(define (peg-parse-literal lst) (trim-1chars (cadr lst)))
+(define (peg-literal->defn lst) (trim-1chars (cadr lst)))
 
 ;; Parses a charclass.
-(define (peg-parse-charclass lst)
+(define (peg-charclass->defn lst)
   (cons 'or
         (map
          (lambda (cc)
@@ -423,7 +423,7 @@ RB < ']'
    (datum->syntax
     str-stx
     (compressor
-     (peg-parse-pattern
+     (peg-pattern->defn
       (peg:tree (peg-parse peg-pattern (syntax->datum str-stx))))))
    accum))
 
-- 
1.7.4.1


[-- Attachment #4: 0003-Make-Macros-Hygienic.patch --]
[-- Type: application/octet-stream, Size: 8354 bytes --]

From 6a8c63da95728dc33ca5984328d9747fbd067e26 Mon Sep 17 00:00:00 2001
From: Noah Lavine <nlavine@haverford.edu>
Date: Sat, 5 Mar 2011 22:37:11 -0500
Subject: [PATCH 3/7] Make Macros Hygienic

 * modules/ice-9/peg.scm: convert the unhygienic macros that generate code
    for string PEGs to use hygiene.
---
 module/ice-9/peg.scm |  129 ++++++++++++++++++++++++++++----------------------
 1 files changed, 73 insertions(+), 56 deletions(-)

diff --git a/module/ice-9/peg.scm b/module/ice-9/peg.scm
index e256c2d..9bf152c 100644
--- a/module/ice-9/peg.scm
+++ b/module/ice-9/peg.scm
@@ -294,7 +294,7 @@ RB < ']'
 
 ;; Pakes a string representing a PEG grammar and defines all the nonterminals in
 ;; it as the associated PEGs.
-(define (peg-parser str)
+(define (peg-parser str for-syntax)
   (let ((parsed (peg-parse peg-grammar str)))
     (if (not parsed)
         (begin
@@ -305,9 +305,10 @@ RB < ']'
            ((or (not (list? lst)) (null? lst))
             lst)
            ((eq? (car lst) 'peg-grammar)
-            (cons 'begin (map (lambda (x) (peg-nonterm->defn x))
-                              (context-flatten (lambda (lst) (<= (depth lst) 2))
-                                          (cdr lst))))))))))
+            #`(begin
+                #,@(map (lambda (x) (peg-nonterm->defn x for-syntax))
+                        (context-flatten (lambda (lst) (<= (depth lst) 2))
+                                         (cdr lst))))))))))
 
 ;; Macro wrapper for PEG-PARSER.  Parses PEG grammars expressed as strings and
 ;; defines all the appropriate nonterminals.
@@ -315,88 +316,101 @@ RB < ']'
   (lambda (x)
     (syntax-case x ()
       ((_ str)
-       (datum->syntax x (peg-parser (syntax->datum #'str)))))))
+       (peg-parser (syntax->datum #'str) x)))))
 (define define-grammar-f peg-parser)
 
 ;; Parse a nonterminal and pattern listed in LST.
-(define (peg-nonterm->defn lst)
-  (let ((nonterm (car lst))
-        (grabber (cadr lst))
-        (pattern (caddr lst)))
-    `(define-nonterm ,(string->symbol (cadr nonterm))
-       ,(cond
-         ((string=? grabber "<--") 'all)
-         ((string=? grabber "<-") 'body)
-         (else 'none))
-       ,(compressor (peg-pattern->defn pattern)))))
+(define (peg-nonterm->defn lst for-syntax)
+  (let* ((nonterm (car lst))
+         (grabber (cadr lst))
+         (pattern (caddr lst))
+         (nonterm-name (datum->syntax for-syntax
+                                      (string->symbol (cadr nonterm)))))
+    #`(define-nonterm #,nonterm-name
+       #,(cond
+          ((string=? grabber "<--") (datum->syntax for-syntax 'all))
+          ((string=? grabber "<-") (datum->syntax for-syntax 'body))
+          (else (datum->syntax for-syntax 'none)))
+       #,(compressor (peg-pattern->defn pattern for-syntax) for-syntax))))
 
 ;; Parse a pattern.
-(define (peg-pattern->defn lst)
-  (cons 'or (map peg-alternative->defn
-                 (context-flatten (lambda (x) (eq? (car x) 'peg-alternative))
-                             (cdr lst)))))
+(define (peg-pattern->defn lst for-syntax)
+  #`(or #,@(map (lambda (x) (peg-alternative->defn x for-syntax))
+                (context-flatten (lambda (x) (eq? (car x) 'peg-alternative))
+                                 (cdr lst)))))
 
 ;; Parse an alternative.
-(define (peg-alternative->defn lst)
-  (cons 'and (map peg-body->defn
-                  (context-flatten (lambda (x) (or (string? (car x))
-                                              (eq? (car x) 'peg-suffix)))
-                              (cdr lst)))))
+(define (peg-alternative->defn lst for-syntax)
+  #`(and #,@(map (lambda (x) (peg-body->defn x for-syntax))
+                 (context-flatten (lambda (x) (or (string? (car x))
+                                             (eq? (car x) 'peg-suffix)))
+                                  (cdr lst)))))
 
 ;; Parse a body.
-(define (peg-body->defn lst)
+(define (peg-body->defn lst for-syntax)
   (let ((suffix '())
-        (front 'lit))
+        (front (datum->syntax for-syntax 'lit)))
     (cond
      ((eq? (car lst) 'peg-suffix)
       (set! suffix lst))
      ((string? (car lst))
-      (begin (set! front (string->symbol (car lst)))
+      (begin (set! front (datum->syntax for-syntax
+                                        (string->symbol (car lst))))
              (set! suffix (cadr lst))))
      (else `(peg-parse-body-fail ,lst)))
-    `(body ,front ,@(peg-suffix->defn suffix))))
+    #`(body #,front #,@(peg-suffix->defn
+                        suffix
+                        for-syntax))))
 
 ;; Parse a suffix.
-(define (peg-suffix->defn lst)
-  (list (peg-primary->defn (cadr lst))
-        (if (null? (cddr lst))
-            1
-            (string->symbol (caddr lst)))))
+(define (peg-suffix->defn lst for-syntax)
+  #`(#,(peg-primary->defn (cadr lst) for-syntax)
+     #,(if (null? (cddr lst))
+           1
+           (datum->syntax for-syntax (string->symbol (caddr lst))))))
 
 ;; Parse a primary.
-(define (peg-primary->defn lst)
+(define (peg-primary->defn lst for-syntax)
   (let ((el (cadr lst)))
   (cond
    ((list? el)
     (cond
      ((eq? (car el) 'peg-literal)
-      (peg-literal->defn el))
+      (peg-literal->defn el for-syntax))
      ((eq? (car el) 'peg-charclass)
-      (peg-charclass->defn el))
+      (peg-charclass->defn el for-syntax))
      ((eq? (car el) 'peg-nonterminal)
-      (string->symbol (cadr el)))))
+      (datum->syntax for-syntax (string->symbol (cadr el))))))
    ((string? el)
     (cond
      ((equal? el "(")
-      (peg-pattern->defn (caddr lst)))
+      (peg-pattern->defn (caddr lst) for-syntax))
      ((equal? el ".")
-      'peg-any)
-     (else `(peg-parse-any unknown-string ,lst))))
-   (else `(peg-parse-any unknown-el ,lst)))))
+      (datum->syntax for-syntax 'peg-any))
+     (else (datum->syntax for-syntax
+                          `(peg-parse-any unknown-string ,lst)))))
+   (else (datum->syntax for-syntax
+                        `(peg-parse-any unknown-el ,lst))))))
 
 ;; Parses a literal.
-(define (peg-literal->defn lst) (trim-1chars (cadr lst)))
+(define (peg-literal->defn lst for-syntax)
+  (datum->syntax for-syntax (trim-1chars (cadr lst))))
 
 ;; Parses a charclass.
-(define (peg-charclass->defn lst)
-  (cons 'or
-        (map
+(define (peg-charclass->defn lst for-syntax)
+  #`(or
+     #,@(map
          (lambda (cc)
            (cond
             ((eq? (car cc) 'charclass-range)
-             `(range ,(string-ref (cadr cc) 0) ,(string-ref (cadr cc) 2)))
+             #`(range #,(datum->syntax
+                         for-syntax
+                         (string-ref (cadr cc) 0))
+                      #,(datum->syntax
+                         for-syntax
+                         (string-ref (cadr cc) 2))))
             ((eq? (car cc) 'charclass-single)
-             (cadr cc))))
+             (datum->syntax for-syntax (cadr cc)))))
          (context-flatten
           (lambda (x) (or (eq? (car x) 'charclass-range)
                           (eq? (car x) 'charclass-single)))
@@ -404,27 +418,30 @@ RB < ']'
 
 ;; Compresses a list to save the optimizer work.
 ;; e.g. (or (and a)) -> a
-(define (compressor lst)
+(define (compressor-core lst)
   (if (or (not (list? lst)) (null? lst))
       lst
       (cond
        ((and (or (eq? (car lst) 'or) (eq? (car lst) 'and))
              (null? (cddr lst)))
-        (compressor (cadr lst)))
+        (compressor-core (cadr lst)))
        ((and (eq? (car lst) 'body)
              (eq? (cadr lst) 'lit)
              (eq? (cadddr lst) 1))
-        (compressor (caddr lst)))
-       (else (map compressor lst)))))
+        (compressor-core (caddr lst)))
+       (else (map compressor-core lst)))))
+
+(define (compressor syn for-syntax)
+  (datum->syntax for-syntax
+                 (compressor-core (syntax->datum syn))))
 
 ;; Builds a lambda-expressions for the pattern STR using accum.
 (define (peg-string-compile str-stx accum)
   (peg-sexp-compile
-   (datum->syntax
-    str-stx
-    (compressor
-     (peg-pattern->defn
-      (peg:tree (peg-parse peg-pattern (syntax->datum str-stx))))))
+   (compressor
+    (peg-pattern->defn
+     (peg:tree (peg-parse peg-pattern (syntax->datum str-stx))) str-stx)
+    str-stx)
    accum))
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-- 
1.7.4.1


[-- Attachment #5: 0004-Separate-PEG-Strings.patch --]
[-- Type: application/octet-stream, Size: 25441 bytes --]

From 0ce194f300136dc857fd06040ed451d00a992503 Mon Sep 17 00:00:00 2001
From: Noah Lavine <nlavine@haverford.edu>
Date: Sat, 5 Mar 2011 23:54:50 -0500
Subject: [PATCH 4/7] Separate PEG Strings

 * module/ice-9/peg.scm: remove functions dealing with PEGs as strings
 * module/ice-9/peg/string-peg.scm: and put them here
---
 module/ice-9/peg.scm            |  291 ++------------------------------------
 module/ice-9/peg/codegen.scm    |    1 +
 module/ice-9/peg/string-peg.scm |  305 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 315 insertions(+), 282 deletions(-)
 create mode 100644 module/ice-9/peg/string-peg.scm

diff --git a/module/ice-9/peg.scm b/module/ice-9/peg.scm
index 9bf152c..b96104a 100644
--- a/module/ice-9/peg.scm
+++ b/module/ice-9/peg.scm
@@ -18,14 +18,11 @@
 ;;;;
 
 (define-module (ice-9 peg)
-  #:export (peg-string-compile
-            context-flatten
+  #:export (context-flatten
             peg-parse
-            define-nonterm
-            define-nonterm-f
+;            define-nonterm
+;            define-nonterm-f
             peg-match
-            define-grammar
-            define-grammar-f
             peg:start
             peg:end
             peg:string
@@ -33,8 +30,13 @@
             peg:substring
             peg-record?
             keyword-flatten)
+;  #:export-syntax (define-nonterm)
   #:use-module (ice-9 peg codegen)
-  #:re-export (peg-sexp-compile)
+  #:use-module (ice-9 peg string-peg)
+  #:re-export (peg-sexp-compile
+               define-grammar
+               define-grammar-f
+               define-nonterm)
   #:use-module (system base pmatch))
 
 ;;;
@@ -64,62 +66,6 @@ execute the STMTs and try again."
 ;;;;; FOR DEFINING AND USING NONTERMINALS
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-;; The results of parsing using a nonterminal are cached.  Think of it like a
-;; hash with no conflict resolution.  Process for deciding on the cache size
-;; wasn't very scientific; just ran the benchmarks and stopped a little after
-;; the point of diminishing returns on my box.
-(define *cache-size* 512)
-
-(define (syntax-for-non-cache-case for-syntax matchf-syn accumsym s-syn)
-;  (let ((matchf-syn (datum->syntax for-syntax matchf)))
-   #`(lambda (str strlen at)
-      (let ((res (#,matchf-syn str strlen at)))
-        ;; Try to match the nonterminal.
-        (if res
-            ;; If we matched, do some post-processing to figure out
-            ;; what data to propagate upward.
-            (let ((at (car res))
-                  (body (cadr res)))
-              #,(cond
-                 ((eq? accumsym 'name)
-                  #`(list at '#,s-syn))
-                 ((eq? accumsym 'all)
-                  #`(list (car res)
-                          (cond
-                           ((not (list? body))
-                            (list '#,s-syn body))
-                           ((null? body) '#,s-syn)
-                           ((symbol? (car body))
-                            (list '#,s-syn body))
-                           (else (cons '#,s-syn body)))))
-                 ((eq? accumsym 'none) #`(list (car res) '()))
-                 (else #`(begin res))))
-            ;; If we didn't match, just return false.
-            #f))))
-
-;; Defines a new nonterminal symbol accumulating with ACCUM.
-(define-syntax define-nonterm
-  (lambda (x)
-    (syntax-case x ()
-      ((_ sym accum pat)
-       (let ((matchf (peg-sexp-compile #'pat (syntax->datum #'accum)))
-             (accumsym (syntax->datum #'accum))
-             (c (datum->syntax x (gensym))));; the cache
-         ;; CODE is the code to parse the string if the result isn't cached.
-         (let ((syn (syntax-for-non-cache-case x matchf accumsym #'sym)))
-           #`(begin
-               (define #,c (make-vector *cache-size* #f));; the cache
-               (define (sym str strlen at)
-                 (let* ((vref (vector-ref #,c (modulo at *cache-size*))))
-                   ;; Check to see whether the value is cached.
-                   (if (and vref (eq? (car vref) str) (= (cadr vref) at))
-                       (caddr vref);; If it is return it.
-                       (let ((fres ;; Else calculate it and cache it.
-                              (#,syn str strlen at)))
-                         (vector-set! #,c (modulo at *cache-size*)
-                                      (list str at fres))
-                         fres)))))))))))
-
 ;; Parses STRING using NONTERM
 (define (peg-parse nonterm string)
   ;; We copy the string before using it because it might have been modified
@@ -225,225 +171,6 @@ execute the STMTs and try again."
          (member (car x) keyword-lst)))
    lst))
 
-;; Gets the left-hand depth of a list.
-(define (depth lst)
-  (if (or (not (list? lst)) (null? lst))
-      0
-      (+ 1 (depth (car lst)))))
-
-;; Trims characters off the front and end of STR.
-;; (trim-1chars "'ab'") -> "ab"
-(define (trim-1chars str) (substring str 1 (- (string-length str) 1)))
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;;;; Parse string PEGs using sexp PEGs.
-;; See the variable PEG-AS-PEG for an easier-to-read syntax.
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;; Grammar for PEGs in PEG grammar.
-(define peg-as-peg
-"grammar <-- (nonterminal ('<--' / '<-' / '<') sp pattern)+
-pattern <-- alternative (SLASH sp alternative)*
-alternative <-- ([!&]? sp suffix)+
-suffix <-- primary ([*+?] sp)*
-primary <-- '(' sp pattern ')' sp / '.' sp / literal / charclass / nonterminal !'<'
-literal <-- ['] (!['] .)* ['] sp
-charclass <-- LB (!']' (CCrange / CCsingle))* RB sp
-CCrange <-- . '-' .
-CCsingle <-- .
-nonterminal <-- [a-zA-Z0-9-]+ sp
-sp < [ \t\n]*
-SLASH < '/'
-LB < '['
-RB < ']'
-")
-
-(define-nonterm peg-grammar all
-  (body lit (and peg-nonterminal (or "<--" "<-" "<") peg-sp peg-pattern) +))
-(define-nonterm peg-pattern all
-  (and peg-alternative
-       (body lit (and (ignore "/") peg-sp peg-alternative) *)))
-(define-nonterm peg-alternative all
-  (body lit (and (body lit (or "!" "&") ?) peg-sp peg-suffix) +))
-(define-nonterm peg-suffix all
-  (and peg-primary (body lit (and (or "*" "+" "?") peg-sp) *)))
-(define-nonterm peg-primary all
-  (or (and "(" peg-sp peg-pattern ")" peg-sp)
-      (and "." peg-sp)
-      peg-literal
-      peg-charclass
-      (and peg-nonterminal (body ! "<" 1))))
-(define-nonterm peg-literal all
-  (and "'" (body lit (and (body ! "'" 1) peg-any) *) "'" peg-sp))
-(define-nonterm peg-charclass all
-  (and (ignore "[")
-       (body lit (and (body ! "]" 1)
-                      (or charclass-range charclass-single)) *)
-       (ignore "]")
-       peg-sp))
-(define-nonterm charclass-range all (and peg-any "-" peg-any))
-(define-nonterm charclass-single all peg-any)
-(define-nonterm peg-nonterminal all
-  (and (body lit (or (range #\a #\z) (range #\A #\Z) (range #\0 #\9) "-") +) peg-sp))
-(define-nonterm peg-sp none
-  (body lit (or " " "\t" "\n") *))
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;;;; PARSE STRING PEGS
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;; Pakes a string representing a PEG grammar and defines all the nonterminals in
-;; it as the associated PEGs.
-(define (peg-parser str for-syntax)
-  (let ((parsed (peg-parse peg-grammar str)))
-    (if (not parsed)
-        (begin
-          ;; (display "Invalid PEG grammar!\n")
-          #f)
-        (let ((lst (peg:tree parsed)))
-          (cond
-           ((or (not (list? lst)) (null? lst))
-            lst)
-           ((eq? (car lst) 'peg-grammar)
-            #`(begin
-                #,@(map (lambda (x) (peg-nonterm->defn x for-syntax))
-                        (context-flatten (lambda (lst) (<= (depth lst) 2))
-                                         (cdr lst))))))))))
-
-;; Macro wrapper for PEG-PARSER.  Parses PEG grammars expressed as strings and
-;; defines all the appropriate nonterminals.
-(define-syntax define-grammar
-  (lambda (x)
-    (syntax-case x ()
-      ((_ str)
-       (peg-parser (syntax->datum #'str) x)))))
-(define define-grammar-f peg-parser)
-
-;; Parse a nonterminal and pattern listed in LST.
-(define (peg-nonterm->defn lst for-syntax)
-  (let* ((nonterm (car lst))
-         (grabber (cadr lst))
-         (pattern (caddr lst))
-         (nonterm-name (datum->syntax for-syntax
-                                      (string->symbol (cadr nonterm)))))
-    #`(define-nonterm #,nonterm-name
-       #,(cond
-          ((string=? grabber "<--") (datum->syntax for-syntax 'all))
-          ((string=? grabber "<-") (datum->syntax for-syntax 'body))
-          (else (datum->syntax for-syntax 'none)))
-       #,(compressor (peg-pattern->defn pattern for-syntax) for-syntax))))
-
-;; Parse a pattern.
-(define (peg-pattern->defn lst for-syntax)
-  #`(or #,@(map (lambda (x) (peg-alternative->defn x for-syntax))
-                (context-flatten (lambda (x) (eq? (car x) 'peg-alternative))
-                                 (cdr lst)))))
-
-;; Parse an alternative.
-(define (peg-alternative->defn lst for-syntax)
-  #`(and #,@(map (lambda (x) (peg-body->defn x for-syntax))
-                 (context-flatten (lambda (x) (or (string? (car x))
-                                             (eq? (car x) 'peg-suffix)))
-                                  (cdr lst)))))
-
-;; Parse a body.
-(define (peg-body->defn lst for-syntax)
-  (let ((suffix '())
-        (front (datum->syntax for-syntax 'lit)))
-    (cond
-     ((eq? (car lst) 'peg-suffix)
-      (set! suffix lst))
-     ((string? (car lst))
-      (begin (set! front (datum->syntax for-syntax
-                                        (string->symbol (car lst))))
-             (set! suffix (cadr lst))))
-     (else `(peg-parse-body-fail ,lst)))
-    #`(body #,front #,@(peg-suffix->defn
-                        suffix
-                        for-syntax))))
-
-;; Parse a suffix.
-(define (peg-suffix->defn lst for-syntax)
-  #`(#,(peg-primary->defn (cadr lst) for-syntax)
-     #,(if (null? (cddr lst))
-           1
-           (datum->syntax for-syntax (string->symbol (caddr lst))))))
-
-;; Parse a primary.
-(define (peg-primary->defn lst for-syntax)
-  (let ((el (cadr lst)))
-  (cond
-   ((list? el)
-    (cond
-     ((eq? (car el) 'peg-literal)
-      (peg-literal->defn el for-syntax))
-     ((eq? (car el) 'peg-charclass)
-      (peg-charclass->defn el for-syntax))
-     ((eq? (car el) 'peg-nonterminal)
-      (datum->syntax for-syntax (string->symbol (cadr el))))))
-   ((string? el)
-    (cond
-     ((equal? el "(")
-      (peg-pattern->defn (caddr lst) for-syntax))
-     ((equal? el ".")
-      (datum->syntax for-syntax 'peg-any))
-     (else (datum->syntax for-syntax
-                          `(peg-parse-any unknown-string ,lst)))))
-   (else (datum->syntax for-syntax
-                        `(peg-parse-any unknown-el ,lst))))))
-
-;; Parses a literal.
-(define (peg-literal->defn lst for-syntax)
-  (datum->syntax for-syntax (trim-1chars (cadr lst))))
-
-;; Parses a charclass.
-(define (peg-charclass->defn lst for-syntax)
-  #`(or
-     #,@(map
-         (lambda (cc)
-           (cond
-            ((eq? (car cc) 'charclass-range)
-             #`(range #,(datum->syntax
-                         for-syntax
-                         (string-ref (cadr cc) 0))
-                      #,(datum->syntax
-                         for-syntax
-                         (string-ref (cadr cc) 2))))
-            ((eq? (car cc) 'charclass-single)
-             (datum->syntax for-syntax (cadr cc)))))
-         (context-flatten
-          (lambda (x) (or (eq? (car x) 'charclass-range)
-                          (eq? (car x) 'charclass-single)))
-          (cdr lst)))))
-
-;; Compresses a list to save the optimizer work.
-;; e.g. (or (and a)) -> a
-(define (compressor-core lst)
-  (if (or (not (list? lst)) (null? lst))
-      lst
-      (cond
-       ((and (or (eq? (car lst) 'or) (eq? (car lst) 'and))
-             (null? (cddr lst)))
-        (compressor-core (cadr lst)))
-       ((and (eq? (car lst) 'body)
-             (eq? (cadr lst) 'lit)
-             (eq? (cadddr lst) 1))
-        (compressor-core (caddr lst)))
-       (else (map compressor-core lst)))))
-
-(define (compressor syn for-syntax)
-  (datum->syntax for-syntax
-                 (compressor-core (syntax->datum syn))))
-
-;; Builds a lambda-expressions for the pattern STR using accum.
-(define (peg-string-compile str-stx accum)
-  (peg-sexp-compile
-   (compressor
-    (peg-pattern->defn
-     (peg:tree (peg-parse peg-pattern (syntax->datum str-stx))) str-stx)
-    str-stx)
-   accum))
-
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;;;; PMATCH STRUCTURE MUNGING
 ;; Pretty self-explanatory.
diff --git a/module/ice-9/peg/codegen.scm b/module/ice-9/peg/codegen.scm
index 43f44cc..2c85ccc 100644
--- a/module/ice-9/peg/codegen.scm
+++ b/module/ice-9/peg/codegen.scm
@@ -20,6 +20,7 @@
 (define-module (ice-9 peg codegen)
   #:export (peg-sexp-compile)
   #:use-module (ice-9 peg)
+  #:use-module (ice-9 peg string-peg)
   #:use-module (ice-9 pretty-print)
   #:use-module (system base pmatch))
 
diff --git a/module/ice-9/peg/string-peg.scm b/module/ice-9/peg/string-peg.scm
new file mode 100644
index 0000000..f7e21f6
--- /dev/null
+++ b/module/ice-9/peg/string-peg.scm
@@ -0,0 +1,305 @@
+;;;; string-peg.scm --- representing PEG grammars as strings
+;;;;
+;;;; 	Copyright (C) 2010, 2011 Free Software Foundation, Inc.
+;;;;
+;;;; This library is free software; you can redistribute it and/or
+;;;; modify it under the terms of the GNU Lesser General Public
+;;;; License as published by the Free Software Foundation; either
+;;;; version 3 of the License, or (at your option) any later version.
+;;;; 
+;;;; This library is distributed in the hope that it will be useful,
+;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;;;; Lesser General Public License for more details.
+;;;; 
+;;;; You should have received a copy of the GNU Lesser General Public
+;;;; License along with this library; if not, write to the Free Software
+;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;;;;
+
+(define-module (ice-9 peg string-peg)
+  #:export (peg-string-compile
+            peg-as-peg
+            define-grammar
+            define-grammar-f
+            define-nonterm
+            peg-grammar)
+  #:use-module (ice-9 peg)
+  #:use-module (ice-9 peg codegen))
+
+;; The results of parsing using a nonterminal are cached.  Think of it like a
+;; hash with no conflict resolution.  Process for deciding on the cache size
+;; wasn't very scientific; just ran the benchmarks and stopped a little after
+;; the point of diminishing returns on my box.
+(define *cache-size* 512)
+
+;; Gets the left-hand depth of a list.
+(define (depth lst)
+  (if (or (not (list? lst)) (null? lst))
+      0
+      (+ 1 (depth (car lst)))))
+
+(eval-when (compile load eval)
+(define (syntax-for-non-cache-case for-syntax matchf-syn accumsym s-syn)
+;  (let ((matchf-syn (datum->syntax for-syntax matchf)))
+   #`(lambda (str strlen at)
+      (let ((res (#,matchf-syn str strlen at)))
+        ;; Try to match the nonterminal.
+        (if res
+            ;; If we matched, do some post-processing to figure out
+            ;; what data to propagate upward.
+            (let ((at (car res))
+                  (body (cadr res)))
+              #,(cond
+                 ((eq? accumsym 'name)
+                  #`(list at '#,s-syn))
+                 ((eq? accumsym 'all)
+                  #`(list (car res)
+                          (cond
+                           ((not (list? body))
+                            (list '#,s-syn body))
+                           ((null? body) '#,s-syn)
+                           ((symbol? (car body))
+                            (list '#,s-syn body))
+                           (else (cons '#,s-syn body)))))
+                 ((eq? accumsym 'none) #`(list (car res) '()))
+                 (else #`(begin res))))
+            ;; If we didn't match, just return false.
+            #f))))
+)
+
+;; Defines a new nonterminal symbol accumulating with ACCUM.
+(define-syntax define-nonterm
+  (lambda (x)
+    (syntax-case x ()
+      ((_ sym accum pat)
+       (let ((matchf (peg-sexp-compile #'pat (syntax->datum #'accum)))
+             (accumsym (syntax->datum #'accum))
+             (c (datum->syntax x (gensym))));; the cache
+         ;; CODE is the code to parse the string if the result isn't cached.
+         (let ((syn (syntax-for-non-cache-case x matchf accumsym #'sym)))
+           #`(begin
+               (define #,c (make-vector *cache-size* #f));; the cache
+               (define (sym str strlen at)
+                 (let* ((vref (vector-ref #,c (modulo at *cache-size*))))
+                   ;; Check to see whether the value is cached.
+                   (if (and vref (eq? (car vref) str) (= (cadr vref) at))
+                       (caddr vref);; If it is return it.
+                       (let ((fres ;; Else calculate it and cache it.
+                              (#,syn str strlen at)))
+                         (vector-set! #,c (modulo at *cache-size*)
+                                      (list str at fres))
+                         fres)))))))))))
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;; Parse string PEGs using sexp PEGs.
+;; See the variable PEG-AS-PEG for an easier-to-read syntax.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Grammar for PEGs in PEG grammar.
+(define peg-as-peg
+"grammar <-- (nonterminal ('<--' / '<-' / '<') sp pattern)+
+pattern <-- alternative (SLASH sp alternative)*
+alternative <-- ([!&]? sp suffix)+
+suffix <-- primary ([*+?] sp)*
+primary <-- '(' sp pattern ')' sp / '.' sp / literal / charclass / nonterminal !'<'
+literal <-- ['] (!['] .)* ['] sp
+charclass <-- LB (!']' (CCrange / CCsingle))* RB sp
+CCrange <-- . '-' .
+CCsingle <-- .
+nonterminal <-- [a-zA-Z0-9-]+ sp
+sp < [ \t\n]*
+SLASH < '/'
+LB < '['
+RB < ']'
+")
+
+(define-nonterm peg-grammar all
+  (body lit (and peg-nonterminal (or "<--" "<-" "<") peg-sp peg-pattern) +))
+(define-nonterm peg-pattern all
+  (and peg-alternative
+       (body lit (and (ignore "/") peg-sp peg-alternative) *)))
+(define-nonterm peg-alternative all
+  (body lit (and (body lit (or "!" "&") ?) peg-sp peg-suffix) +))
+(define-nonterm peg-suffix all
+  (and peg-primary (body lit (and (or "*" "+" "?") peg-sp) *)))
+(define-nonterm peg-primary all
+  (or (and "(" peg-sp peg-pattern ")" peg-sp)
+      (and "." peg-sp)
+      peg-literal
+      peg-charclass
+      (and peg-nonterminal (body ! "<" 1))))
+(define-nonterm peg-literal all
+  (and "'" (body lit (and (body ! "'" 1) peg-any) *) "'" peg-sp))
+(define-nonterm peg-charclass all
+  (and (ignore "[")
+       (body lit (and (body ! "]" 1)
+                      (or charclass-range charclass-single)) *)
+       (ignore "]")
+       peg-sp))
+(define-nonterm charclass-range all (and peg-any "-" peg-any))
+(define-nonterm charclass-single all peg-any)
+(define-nonterm peg-nonterminal all
+  (and (body lit (or (range #\a #\z) (range #\A #\Z) (range #\0 #\9) "-") +) peg-sp))
+(define-nonterm peg-sp none
+  (body lit (or " " "\t" "\n") *))
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;; PARSE STRING PEGS
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Pakes a string representing a PEG grammar and defines all the nonterminals in
+;; it as the associated PEGs.
+(define (peg-parser str for-syntax)
+  (let ((parsed (peg-parse peg-grammar str)))
+    (if (not parsed)
+        (begin
+          ;; (display "Invalid PEG grammar!\n")
+          #f)
+        (let ((lst (peg:tree parsed)))
+          (cond
+           ((or (not (list? lst)) (null? lst))
+            lst)
+           ((eq? (car lst) 'peg-grammar)
+            #`(begin
+                #,@(map (lambda (x) (peg-nonterm->defn x for-syntax))
+                        (context-flatten (lambda (lst) (<= (depth lst) 2))
+                                         (cdr lst))))))))))
+
+;; Macro wrapper for PEG-PARSER.  Parses PEG grammars expressed as strings and
+;; defines all the appropriate nonterminals.
+(define-syntax define-grammar
+  (lambda (x)
+    (syntax-case x ()
+      ((_ str)
+       (peg-parser (syntax->datum #'str) x)))))
+(define define-grammar-f peg-parser)
+
+;; Parse a nonterminal and pattern listed in LST.
+(define (peg-nonterm->defn lst for-syntax)
+  (let* ((nonterm (car lst))
+         (grabber (cadr lst))
+         (pattern (caddr lst))
+         (nonterm-name (datum->syntax for-syntax
+                                      (string->symbol (cadr nonterm)))))
+    #`(define-nonterm #,nonterm-name
+       #,(cond
+          ((string=? grabber "<--") (datum->syntax for-syntax 'all))
+          ((string=? grabber "<-") (datum->syntax for-syntax 'body))
+          (else (datum->syntax for-syntax 'none)))
+       #,(compressor (peg-pattern->defn pattern for-syntax) for-syntax))))
+
+;; Parse a pattern.
+(define (peg-pattern->defn lst for-syntax)
+  #`(or #,@(map (lambda (x) (peg-alternative->defn x for-syntax))
+                (context-flatten (lambda (x) (eq? (car x) 'peg-alternative))
+                                 (cdr lst)))))
+
+;; Parse an alternative.
+(define (peg-alternative->defn lst for-syntax)
+  #`(and #,@(map (lambda (x) (peg-body->defn x for-syntax))
+                 (context-flatten (lambda (x) (or (string? (car x))
+                                             (eq? (car x) 'peg-suffix)))
+                                  (cdr lst)))))
+
+;; Parse a body.
+(define (peg-body->defn lst for-syntax)
+  (let ((suffix '())
+        (front (datum->syntax for-syntax 'lit)))
+    (cond
+     ((eq? (car lst) 'peg-suffix)
+      (set! suffix lst))
+     ((string? (car lst))
+      (begin (set! front (datum->syntax for-syntax
+                                        (string->symbol (car lst))))
+             (set! suffix (cadr lst))))
+     (else `(peg-parse-body-fail ,lst)))
+    #`(body #,front #,@(peg-suffix->defn
+                        suffix
+                        for-syntax))))
+
+;; Parse a suffix.
+(define (peg-suffix->defn lst for-syntax)
+  #`(#,(peg-primary->defn (cadr lst) for-syntax)
+     #,(if (null? (cddr lst))
+           1
+           (datum->syntax for-syntax (string->symbol (caddr lst))))))
+
+;; Parse a primary.
+(define (peg-primary->defn lst for-syntax)
+  (let ((el (cadr lst)))
+  (cond
+   ((list? el)
+    (cond
+     ((eq? (car el) 'peg-literal)
+      (peg-literal->defn el for-syntax))
+     ((eq? (car el) 'peg-charclass)
+      (peg-charclass->defn el for-syntax))
+     ((eq? (car el) 'peg-nonterminal)
+      (datum->syntax for-syntax (string->symbol (cadr el))))))
+   ((string? el)
+    (cond
+     ((equal? el "(")
+      (peg-pattern->defn (caddr lst) for-syntax))
+     ((equal? el ".")
+      (datum->syntax for-syntax 'peg-any))
+     (else (datum->syntax for-syntax
+                          `(peg-parse-any unknown-string ,lst)))))
+   (else (datum->syntax for-syntax
+                        `(peg-parse-any unknown-el ,lst))))))
+
+;; Trims characters off the front and end of STR.
+;; (trim-1chars "'ab'") -> "ab"
+(define (trim-1chars str) (substring str 1 (- (string-length str) 1)))
+
+;; Parses a literal.
+(define (peg-literal->defn lst for-syntax)
+  (datum->syntax for-syntax (trim-1chars (cadr lst))))
+
+;; Parses a charclass.
+(define (peg-charclass->defn lst for-syntax)
+  #`(or
+     #,@(map
+         (lambda (cc)
+           (cond
+            ((eq? (car cc) 'charclass-range)
+             #`(range #,(datum->syntax
+                         for-syntax
+                         (string-ref (cadr cc) 0))
+                      #,(datum->syntax
+                         for-syntax
+                         (string-ref (cadr cc) 2))))
+            ((eq? (car cc) 'charclass-single)
+             (datum->syntax for-syntax (cadr cc)))))
+         (context-flatten
+          (lambda (x) (or (eq? (car x) 'charclass-range)
+                          (eq? (car x) 'charclass-single)))
+          (cdr lst)))))
+
+;; Compresses a list to save the optimizer work.
+;; e.g. (or (and a)) -> a
+(define (compressor-core lst)
+  (if (or (not (list? lst)) (null? lst))
+      lst
+      (cond
+       ((and (or (eq? (car lst) 'or) (eq? (car lst) 'and))
+             (null? (cddr lst)))
+        (compressor-core (cadr lst)))
+       ((and (eq? (car lst) 'body)
+             (eq? (cadr lst) 'lit)
+             (eq? (cadddr lst) 1))
+        (compressor-core (caddr lst)))
+       (else (map compressor-core lst)))))
+
+(define (compressor syn for-syntax)
+  (datum->syntax for-syntax
+                 (compressor-core (syntax->datum syn))))
+
+;; Builds a lambda-expressions for the pattern STR using accum.
+(define (peg-string-compile str-stx accum)
+  (peg-sexp-compile
+   (compressor
+    (peg-pattern->defn
+     (peg:tree (peg-parse peg-pattern (syntax->datum str-stx))) str-stx)
+    str-stx)
+   accum))
-- 
1.7.4.1


[-- Attachment #6: 0005-Factor-PEG-Functions.patch --]
[-- Type: application/octet-stream, Size: 8366 bytes --]

From 58d7f11462917061349e023c84293f1ff065753f Mon Sep 17 00:00:00 2001
From: Noah Lavine <nlavine@haverford.edu>
Date: Sun, 6 Mar 2011 00:02:27 -0500
Subject: [PATCH 5/7] Factor PEG Functions

 * module/ice-9/peg.scm: take out the functions for simplifying trees
 * module/ice-9/peg/simplify-tree.scm: and put them here
---
 module/ice-9/peg.scm               |   87 ++------------------------------
 module/ice-9/peg/simplify-tree.scm |   97 ++++++++++++++++++++++++++++++++++++
 2 files changed, 103 insertions(+), 81 deletions(-)
 create mode 100644 module/ice-9/peg/simplify-tree.scm

diff --git a/module/ice-9/peg.scm b/module/ice-9/peg.scm
index b96104a..cb79c60 100644
--- a/module/ice-9/peg.scm
+++ b/module/ice-9/peg.scm
@@ -18,8 +18,7 @@
 ;;;;
 
 (define-module (ice-9 peg)
-  #:export (context-flatten
-            peg-parse
+  #:export (peg-parse
 ;            define-nonterm
 ;            define-nonterm-f
             peg-match
@@ -28,16 +27,17 @@
             peg:string
             peg:tree
             peg:substring
-            peg-record?
-            keyword-flatten)
+            peg-record?)
 ;  #:export-syntax (define-nonterm)
   #:use-module (ice-9 peg codegen)
   #:use-module (ice-9 peg string-peg)
+  #:use-module (ice-9 peg simplify-tree)
   #:re-export (peg-sexp-compile
                define-grammar
                define-grammar-f
-               define-nonterm)
-  #:use-module (system base pmatch))
+               define-nonterm
+               keyword-flatten
+               context-flatten))
 
 ;;;
 ;;; Helper Macros
@@ -52,14 +52,6 @@ execute the STMTs and try again."
        (or test
            (begin stmt stmt* ... (lp)))))))
 
-(define-syntax single?
-  (syntax-rules ()
-    "Return #t if X is a list of one element."
-    ((_ x)
-     (pmatch x
-       ((_) #t)
-       (else #f)))))
-
 (eval-when (compile load eval)
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -105,73 +97,6 @@ execute the STMTs and try again."
                         (string-collapse match))))))))))))
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;;;; POST-PROCESSING FUNCTIONS (TO CANONICALIZE MATCH TREES)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;; Is everything in LST true?
-(define (andlst lst)
-  (or (null? lst)
-      (and (car lst) (andlst (cdr lst)))))
-
-;; Is LST a list of strings?
-(define (string-list? lst)
-  (and (list? lst) (not (null? lst))
-       (andlst (map string? lst))))
-
-;; Groups all strings that are next to each other in LST.  Used in
-;; STRING-COLLAPSE.
-(define (string-group lst)
-  (if (not (list? lst))
-      lst
-      (if (null? lst)
-          '()
-          (let ((next (string-group (cdr lst))))
-            (if (not (string? (car lst)))
-                (cons (car lst) next)
-                (if (and (not (null? next))
-                         (list? (car next))
-                         (string? (caar next)))
-                    (cons (cons (car lst) (car next)) (cdr next))
-                    (cons (list (car lst)) next)))))))
-
-
-;; Collapses all the string in LST.
-;; ("a" "b" (c d) "e" "f") -> ("ab" (c d) "ef")
-(define (string-collapse lst)
-  (if (list? lst)
-      (let ((res (map (lambda (x) (if (string-list? x)
-                                      (apply string-append x)
-                                      x))
-                      (string-group (map string-collapse lst)))))
-        (if (single? res) (car res) res))
-      lst))
-
-;; If LST is an atom, return (list LST), else return LST.
-(define (mklst lst)
-  (if (not (list? lst)) (list lst) lst))
-
-;; Takes a list and "flattens" it, using the predicate TST to know when to stop
-;; instead of terminating on atoms (see tutorial).
-(define (context-flatten tst lst)
-  (if (or (not (list? lst)) (null? lst))
-      lst
-      (if (tst lst)
-          (list lst)
-          (apply append
-                 (map (lambda (x) (mklst (context-flatten tst x)))
-                      lst)))))
-
-;; Takes a list and "flattens" it, using the list of keywords KEYWORD-LST to
-;; know when to stop at (see tutorial).
-(define (keyword-flatten keyword-lst lst)
-  (context-flatten
-   (lambda (x)
-     (if (or (not (list? x)) (null? x))
-         #t
-         (member (car x) keyword-lst)))
-   lst))
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;;;; PMATCH STRUCTURE MUNGING
 ;; Pretty self-explanatory.
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
diff --git a/module/ice-9/peg/simplify-tree.scm b/module/ice-9/peg/simplify-tree.scm
new file mode 100644
index 0000000..4c781a1
--- /dev/null
+++ b/module/ice-9/peg/simplify-tree.scm
@@ -0,0 +1,97 @@
+;;;; simplify-tree.scm --- utility functions for the PEG parser
+;;;;
+;;;; 	Copyright (C) 2010, 2011 Free Software Foundation, Inc.
+;;;;
+;;;; This library is free software; you can redistribute it and/or
+;;;; modify it under the terms of the GNU Lesser General Public
+;;;; License as published by the Free Software Foundation; either
+;;;; version 3 of the License, or (at your option) any later version.
+;;;; 
+;;;; This library is distributed in the hope that it will be useful,
+;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;;;; Lesser General Public License for more details.
+;;;; 
+;;;; You should have received a copy of the GNU Lesser General Public
+;;;; License along with this library; if not, write to the Free Software
+;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;;;;
+
+(define-module (ice-9 peg simplify-tree)
+  #:export (keyword-flatten context-flatten string-collapse)
+  #:use-module (system base pmatch))
+
+(define-syntax single?
+  (syntax-rules ()
+    "Return #t if X is a list of one element."
+    ((_ x)
+     (pmatch x
+       ((_) #t)
+       (else #f)))))
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;; POST-PROCESSING FUNCTIONS (TO CANONICALIZE MATCH TREES)
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Is everything in LST true?
+(define (andlst lst)
+  (or (null? lst)
+      (and (car lst) (andlst (cdr lst)))))
+
+;; Is LST a list of strings?
+(define (string-list? lst)
+  (and (list? lst) (not (null? lst))
+       (andlst (map string? lst))))
+
+;; Groups all strings that are next to each other in LST.  Used in
+;; STRING-COLLAPSE.
+(define (string-group lst)
+  (if (not (list? lst))
+      lst
+      (if (null? lst)
+          '()
+          (let ((next (string-group (cdr lst))))
+            (if (not (string? (car lst)))
+                (cons (car lst) next)
+                (if (and (not (null? next))
+                         (list? (car next))
+                         (string? (caar next)))
+                    (cons (cons (car lst) (car next)) (cdr next))
+                    (cons (list (car lst)) next)))))))
+
+
+;; Collapses all the string in LST.
+;; ("a" "b" (c d) "e" "f") -> ("ab" (c d) "ef")
+(define (string-collapse lst)
+  (if (list? lst)
+      (let ((res (map (lambda (x) (if (string-list? x)
+                                      (apply string-append x)
+                                      x))
+                      (string-group (map string-collapse lst)))))
+        (if (single? res) (car res) res))
+      lst))
+
+;; If LST is an atom, return (list LST), else return LST.
+(define (mklst lst)
+  (if (not (list? lst)) (list lst) lst))
+
+;; Takes a list and "flattens" it, using the predicate TST to know when to stop
+;; instead of terminating on atoms (see tutorial).
+(define (context-flatten tst lst)
+  (if (or (not (list? lst)) (null? lst))
+      lst
+      (if (tst lst)
+          (list lst)
+          (apply append
+                 (map (lambda (x) (mklst (context-flatten tst x)))
+                      lst)))))
+
+;; Takes a list and "flattens" it, using the list of keywords KEYWORD-LST to
+;; know when to stop at (see tutorial).
+(define (keyword-flatten keyword-lst lst)
+  (context-flatten
+   (lambda (x)
+     (if (or (not (list? x)) (null? x))
+         #t
+         (member (car x) keyword-lst)))
+   lst))
-- 
1.7.4.1


[-- Attachment #7: 0006-Factor-PEG-Structure.patch --]
[-- Type: application/octet-stream, Size: 4687 bytes --]

From e5b7b22b9e916b395e6070c0ee52b75ac237fc2f Mon Sep 17 00:00:00 2001
From: Noah Lavine <nlavine@haverford.edu>
Date: Sun, 6 Mar 2011 00:09:16 -0500
Subject: [PATCH 6/7] Factor PEG Structure

 * modules/ice-9/peg.scm: remove the part that defines a match structure
 * modules/ice-9/peg/match-record.scm: and put it here
---
 module/ice-9/peg.scm              |   38 +++++++-------------------------
 module/ice-9/peg/match-record.scm |   43 +++++++++++++++++++++++++++++++++++++
 2 files changed, 52 insertions(+), 29 deletions(-)
 create mode 100644 module/ice-9/peg/match-record.scm

diff --git a/module/ice-9/peg.scm b/module/ice-9/peg.scm
index cb79c60..4269f9b 100644
--- a/module/ice-9/peg.scm
+++ b/module/ice-9/peg.scm
@@ -21,23 +21,24 @@
   #:export (peg-parse
 ;            define-nonterm
 ;            define-nonterm-f
-            peg-match
-            peg:start
-            peg:end
-            peg:string
-            peg:tree
-            peg:substring
-            peg-record?)
+            peg-match)
 ;  #:export-syntax (define-nonterm)
   #:use-module (ice-9 peg codegen)
   #:use-module (ice-9 peg string-peg)
   #:use-module (ice-9 peg simplify-tree)
+  #:use-module (ice-9 peg match-record)
   #:re-export (peg-sexp-compile
                define-grammar
                define-grammar-f
                define-nonterm
                keyword-flatten
-               context-flatten))
+               context-flatten
+               peg:start
+               peg:end
+               peg:string
+               peg:tree
+               peg:substring
+               peg-record?))
 
 ;;;
 ;;; Helper Macros
@@ -96,26 +97,5 @@ execute the STMTs and try again."
                         at end string
                         (string-collapse match))))))))))))
 
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;;;; PMATCH STRUCTURE MUNGING
-;; Pretty self-explanatory.
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-(define prec
-  (make-record-type "peg" '(start end string tree)))
-(define make-prec
-  (record-constructor prec '(start end string tree)))
-(define (peg:start pm)
-  (if pm ((record-accessor prec 'start) pm) #f))
-(define (peg:end pm)
-  (if pm ((record-accessor prec 'end) pm) #f))
-(define (peg:string pm)
-  (if pm ((record-accessor prec 'string) pm) #f))
-(define (peg:tree pm)
-  (if pm ((record-accessor prec 'tree) pm) #f))
-(define (peg:substring pm)
-  (if pm (substring (peg:string pm) (peg:start pm) (peg:end pm)) #f))
-(define peg-record? (record-predicate prec))
-
 )
 
diff --git a/module/ice-9/peg/match-record.scm b/module/ice-9/peg/match-record.scm
new file mode 100644
index 0000000..87785a5
--- /dev/null
+++ b/module/ice-9/peg/match-record.scm
@@ -0,0 +1,43 @@
+;;;; match-record.scm --- records to hold PEG parser results
+;;;;
+;;;; 	Copyright (C) 2010, 2011 Free Software Foundation, Inc.
+;;;;
+;;;; This library is free software; you can redistribute it and/or
+;;;; modify it under the terms of the GNU Lesser General Public
+;;;; License as published by the Free Software Foundation; either
+;;;; version 3 of the License, or (at your option) any later version.
+;;;; 
+;;;; This library is distributed in the hope that it will be useful,
+;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;;;; Lesser General Public License for more details.
+;;;; 
+;;;; You should have received a copy of the GNU Lesser General Public
+;;;; License along with this library; if not, write to the Free Software
+;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;;;;
+
+(define-module (ice-9 peg match-record)
+  #:export (prec make-prec peg:start peg:end peg:string
+            peg:tree peg:substring peg-record?))
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;; PMATCH STRUCTURE MUNGING
+;; Pretty self-explanatory.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define prec
+  (make-record-type "peg" '(start end string tree)))
+(define make-prec
+  (record-constructor prec '(start end string tree)))
+(define (peg:start pm)
+  (if pm ((record-accessor prec 'start) pm) #f))
+(define (peg:end pm)
+  (if pm ((record-accessor prec 'end) pm) #f))
+(define (peg:string pm)
+  (if pm ((record-accessor prec 'string) pm) #f))
+(define (peg:tree pm)
+  (if pm ((record-accessor prec 'tree) pm) #f))
+(define (peg:substring pm)
+  (if pm (substring (peg:string pm) (peg:start pm) (peg:end pm)) #f))
+(define peg-record? (record-predicate prec))
-- 
1.7.4.1


[-- Attachment #8: 0007-Remove-eval-when.patch --]
[-- Type: application/octet-stream, Size: 1039 bytes --]

From d3394d374c6d7124ae3c25212340772285d9290a Mon Sep 17 00:00:00 2001
From: Noah Lavine <nlavine@haverford.edu>
Date: Sun, 6 Mar 2011 00:12:37 -0500
Subject: [PATCH 7/7] Remove eval-when

 * module/ice-9/peg.scm: remove the eval-when statement
---
 module/ice-9/peg.scm |    5 -----
 1 files changed, 0 insertions(+), 5 deletions(-)

diff --git a/module/ice-9/peg.scm b/module/ice-9/peg.scm
index 4269f9b..644af6d 100644
--- a/module/ice-9/peg.scm
+++ b/module/ice-9/peg.scm
@@ -53,8 +53,6 @@ execute the STMTs and try again."
        (or test
            (begin stmt stmt* ... (lp)))))))
 
-(eval-when (compile load eval)
-
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;;;; FOR DEFINING AND USING NONTERMINALS
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -96,6 +94,3 @@ execute the STMTs and try again."
                        (make-prec
                         at end string
                         (string-collapse match))))))))))))
-
-)
-
-- 
1.7.4.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2011-03-31 21:48 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-03-06  5:25 PEG Patches Noah Lavine
2011-03-07  1:28 ` Noah Lavine
2011-03-25 18:06 ` Andy Wingo
2011-03-28 20:44   ` Noah Lavine
2011-03-28 20:46     ` Noah Lavine
2011-03-28 22:17     ` Michael Lucy
2011-03-29  8:00     ` Andy Wingo
2011-03-29 12:47       ` Noah Lavine
2011-03-29 13:20         ` Andy Wingo
2011-03-31 21:48           ` Noah Lavine

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).