From: Thien-Thi Nguyen <ttn@gnuvola.org>
To: Andy Wingo <wingo@pobox.com>
Cc: guile-user@gnu.org
Subject: Re: Guile base64
Date: Tue, 28 Sep 2010 14:48:36 +0200 [thread overview]
Message-ID: <87tylayqxn.fsf@ambire.localdomain> (raw)
In-Reply-To: <m3mxrvkxak.fsf@unquote.localdomain> (Andy Wingo's message of "Mon, 06 Sep 2010 11:59:15 +0200")
[-- Attachment #1: Type: text/plain, Size: 1169 bytes --]
() Andy Wingo <wingo@pobox.com>
() Mon, 06 Sep 2010 11:59:15 +0200
Perhaps we could combine interfaces -- the stream-based (I presume; I
haven't seen your code yet, but I know your Scheme code looks good)
interfaces from yours, and the map-3-to-4 stuff from Andreas' R6RS port
of my base64.scm.
I say this because the r6rs port already uses bytevectors, and compiles
down to fairly tight VM code. I haven't run benchmarks though.
Please find below a five-minute port of (ice-9 base64) to Guile 1.9.x.
To play, write it to /tmp and try something like:
$ cd /tmp
$ cat > hack <<EOF
(use-modules (ice-9 base64))
(base64-encode #t (open-input-file "/tmp/hack") 44)
(newline)
EOF
$ base64 -w 44 hack > A
$ guile -l base64.scm -s hack | diff A -
Probably the next step is to settle the interface. I am biased towards
1.4.x compatability but am open (albeit reluctantly) to breaking it.
Once the interface is settled, i'll add docs and some perfunctory tests
and repost. WRT performance -- feel free to rewrite things afterwards;
i'm not yet up to speed (har har) on this area of Guile development
enough to help more than hinder.
[-- Attachment #2: base64.scm --]
[-- Type: application/octet-stream, Size: 8942 bytes --]
;;; base64.scm --- base64 encode/decode
;; Copyright (C) 2010 Free Software Foundation, Inc.
;; Copyright (C) 2004, 2005, 2007 Thien-Thi Nguyen
;;
;; This library is free software; you can redistribute it and/or
;; modify it under the terms of the GNU Lesser General Public
;; License as published by the Free Software Foundation; either
;; version 3 of the License, or (at your option) any later version.
;;
;; This library is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;; Lesser General Public License for more details.
;;
;; You should have received a copy of the GNU Lesser General Public
;; License along with this library; if not, write to the Free Software
;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;;; Commentary:
;; The base64 encoding (rfc 2045) is basically a 3-byte to
;; 4-byte transform:
;;
;; GGGGGGGG NNNNNNNN UUUUUUUU <=> GGGGGG GGNNNN NNNNUU UUUUUU
;; GGGGGGRR RRRROOOO OOKKKKKK <=> GGGGGG RRRRRR OOOOOO KKKKKK
;;
;; modulo line breaks and terminating delimiters. It is widely used
;; in MIME and other protocols where the transmission medium may not
;; be guaranteed 8-bit clean.
;;; Code:
(define-module (ice-9 base64)
#:export (base64-encode base64-decode)
#:use-module ((rnrs io ports) #:select (get-u8)))
;;; support
(define (getb!-proc port)
(lambda ()
(let ((b (get-u8 port)))
(and (not (eof-object? b)) b))))
(define (analyze-input input) ; => (getb! . exp-input)
(cond ((string? input)
(cons (getb!-proc (open-input-string input))
(string-length input)))
((port? input)
(cons (getb!-proc input)
(and (port-filename input)
(let ((guess (false-if-exception
(stat:size (stat input)))))
(and (number? guess)
(positive? guess)
guess)))))
(else #f)))
(define c2i char->integer)
(define i2c integer->char)
(define (char+ c n)
(i2c (+ (c2i c) n)))
(define *reasonable-chunk* (ash 1 (ash 1 (ash 1 (ash 1 (ash 1 0)))))) ; 64 KiB
(define subs substring)
(define (make-buffer-stack size)
(let ((cur #f) (stack '()) (idx size))
(define (new!)
(set! cur (make-string size))
(set! stack (cons cur stack))
(set! idx 0))
(define (spill!)
(or (= idx size) (set-car! stack (subs (car stack) 0 idx)))
(let ((res (apply string-append (reverse! stack))))
(set! cur #f)
(set! stack '())
(set! idx size)
res))
;; rv
(lambda (c)
(cond (c (and (= idx size) (new!))
(string-set! cur idx c)
(set! idx (1+ idx)))
(else (spill!))))))
(define (make-outp-buffer outp size)
(let ((buf (make-string size)) (idx 0) (count 0))
(define (>OUT flush? x) ;;; todo: make non-blocking if (not flush?)
(display x outp)
(and flush? (force-output outp)))
(define (new!)
(>OUT #f buf)
(set! idx 0))
(define (spill!)
(>OUT #t (if (= idx size) buf (subs buf 0 idx)))
(set! idx 0)
(let ((rv count))
(set! count 0)
rv))
;; rv
(lambda (c)
(cond (c (and (= idx size) (new!))
(string-set! buf idx c)
(set! idx (1+ idx))
(set! count (1+ count)))
(else (spill!))))))
(define (make-ob! output size) ; output bufferer
(if output
(make-outp-buffer (if (eq? #t output)
(current-output-port)
output)
(min *reasonable-chunk* size))
(make-buffer-stack size)))
;;; encoding
(define *enc-map*
(let ((em (make-vector 64 #f)))
(vector-set! em 62 #\+)
(vector-set! em 63 #\/)
(do ((i 0 (1+ i)))
((= i 26) em) ; rv
(and (< i 10)
(vector-set! em (+ 52 i) (char+ #\0 i)))
(vector-set! em (+ 0 i) (char+ #\A i))
(vector-set! em (+ 26 i) (char+ #\a i)))))
(define (encode! output getb! line-break crlf exp-input)
(let* ((bgrp (and line-break (quotient (max 4 line-break) 4)))
(ob! (make-ob! output
(if exp-input
;; compute exact result size
(let ((raw (* 4 (inexact->exact
(ceiling (/ (1- exp-input)
3))))))
(+ raw (if bgrp
(* (quotient raw (* 4 bgrp))
(if crlf 2 1))
0)))
*reasonable-chunk*)))
(lb! (if crlf
(lambda () (ob! #\cr) (ob! #\newline))
(lambda () (ob! #\newline)))))
(define (acc! x)
(ob! (if x (vector-ref *enc-map* x) #\=)))
(let loop ((group bgrp))
(let* ((g (getb!)) ; do not use `let' here
(n (getb!))
(u (getb!)))
(cond (g
(and bgrp (zero? group) (begin (lb!) (set! group bgrp)))
(acc! (ash g -2))
(acc! (and (or g n)
(logior (ash (logand (or g 0) 3) 4)
(logand (ash (or n 0) -4) 15))))
(acc! (and (or n u)
(logior (ash (logand (or n 0) 15) 2)
(ash (or u 0) -6))))
(acc! (and u (logand u 63)))))
(if (and g n u)
(loop (and bgrp (1- group)))
(ob! #f))))))
;; Write to @var{out-port} the result of base64-encoding @var{input} and
;; return the number of bytes written. If @var{out-port} is #t, send to
;; the current output port. If @var{out-port} is #f, return the
;; result as a string, instead. @var{input} may be a string or a port.
;;
;; Optional third arg @var{line-break} specifies the maximum number of columns
;; to appear in the result before a line break. Actual number of columns is a
;; rounded-down multiple of four, but not less than four. The result never
;; ends with a line break. #f means omit line breaks entirely.
;;
;; Optional fourth arg @var{crlf?} non-#f means use @sc{crlf} for line breaks
;; instead of simply @sc{lf}.
;;
;;-sig: (out-port input [line-break [crlf?]])
;;
(define (base64-encode out-port input . opts)
(or (and=> (analyze-input input)
(lambda (pair) ; (getb! . exp-input)
(encode! out-port
(car pair)
(and (not (null? opts))
(car opts))
(and (not (null? opts))
(not (null? (cdr opts)))
(cadr opts))
(cdr pair))))
(error "bad input:" input)))
;;; decoding
(define *dec-map*
(let ((dm (make-vector 256 #f)))
(do ((i 0 (1+ i)))
((= i 64) dm) ; rv
(vector-set! dm (c2i (vector-ref *enc-map* i)) i))))
(define byte-whitespace?
(let ((ws-bytes (map c2i '(#\space #\np #\nl #\cr #\ht #\vt))))
(lambda (b) (memq b ws-bytes))))
(define (decode! output getb! exp-input)
(let ((ob! (make-ob!
output
(if exp-input
;; approximate result size (whitespace not known a priori)
(inexact->exact (ceiling (* 3 (/ (1+ exp-input) 4))))
*reasonable-chunk*))))
(define (acc! x)
(ob! (i2c x)))
(define (find!)
(let ((b (getb!)))
(and b (if (byte-whitespace? b)
(find!)
(vector-ref *dec-map* b)))))
;; do it!
(let loop ()
(let* ((g (find!)) ; do not use `let' here
(r (find!))
(o (find!))
(k (find!)))
(and g r (acc! (logior (ash g 2) (ash r -4))))
(and r o (acc! (logior (ash (logand r 15) 4) (ash o -2))))
(and o k (acc! (logior (ash (logand o 3) 6) k)))
(if (and g r o k)
(loop)
(ob! #f))))))
;; Write to @var{out-port} the result of base64-decoding @var{input} and
;; return the number of bytes written. If @var{out-port} is #t, send to
;; the current output port. If @var{out-port} is #f, return the
;; result as a string, instead. @var{input} may be a string or a port.
;;
(define (base64-decode out-port input)
(or (and=> (analyze-input input)
(lambda (pair) ; (getb! . exp-input)
(decode! out-port
(car pair)
(cdr pair))))
(error "bad input:" input)))
;;; base64.scm ends here
prev parent reply other threads:[~2010-09-28 12:48 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-09-01 18:10 Guile base64 Romel Sandoval
2010-09-02 8:12 ` Ludovic Courtès
2010-09-02 18:49 ` Andy Wingo
2010-09-02 23:34 ` Andreas Rottmann
2010-09-03 20:15 ` Romel Sandoval
2010-09-05 21:58 ` Thien-Thi Nguyen
2010-09-05 22:47 ` Ludovic Courtès
2010-09-06 9:39 ` Thien-Thi Nguyen
2010-09-06 9:59 ` Andy Wingo
2010-09-28 12:48 ` Thien-Thi Nguyen [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://www.gnu.org/software/guile/
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=87tylayqxn.fsf@ambire.localdomain \
--to=ttn@gnuvola.org \
--cc=guile-user@gnu.org \
--cc=wingo@pobox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).