unofficial mirror of guile-user@gnu.org 
 help / color / mirror / Atom feed
From: Thien-Thi Nguyen <ttn@gnuvola.org>
To: Andy Wingo <wingo@pobox.com>
Cc: guile-user@gnu.org
Subject: Re: Guile base64
Date: Tue, 28 Sep 2010 14:48:36 +0200	[thread overview]
Message-ID: <87tylayqxn.fsf@ambire.localdomain> (raw)
In-Reply-To: <m3mxrvkxak.fsf@unquote.localdomain> (Andy Wingo's message of "Mon, 06 Sep 2010 11:59:15 +0200")

[-- Attachment #1: Type: text/plain, Size: 1169 bytes --]

() Andy Wingo <wingo@pobox.com>
() Mon, 06 Sep 2010 11:59:15 +0200

   Perhaps we could combine interfaces -- the stream-based (I presume; I
   haven't seen your code yet, but I know your Scheme code looks good)
   interfaces from yours, and the map-3-to-4 stuff from Andreas' R6RS port
   of my base64.scm.

   I say this because the r6rs port already uses bytevectors, and compiles
   down to fairly tight VM code. I haven't run benchmarks though.

Please find below a five-minute port of (ice-9 base64) to Guile 1.9.x.
To play, write it to /tmp and try something like:

  $ cd /tmp
  $ cat > hack <<EOF
  (use-modules (ice-9 base64))
  (base64-encode #t (open-input-file "/tmp/hack") 44)
  (newline)
  EOF
  
  $ base64 -w 44 hack > A
  $ guile -l base64.scm -s hack | diff A -

Probably the next step is to settle the interface.  I am biased towards
1.4.x compatability but am open (albeit reluctantly) to breaking it.
Once the interface is settled, i'll add docs and some perfunctory tests
and repost.  WRT performance -- feel free to rewrite things afterwards;
i'm not yet up to speed (har har) on this area of Guile development
enough to help more than hinder.


[-- Attachment #2: base64.scm --]
[-- Type: application/octet-stream, Size: 8942 bytes --]

;;; base64.scm --- base64 encode/decode

;; Copyright (C) 2010 Free Software Foundation, Inc.
;; Copyright (C) 2004, 2005, 2007 Thien-Thi Nguyen
;;
;; This library is free software; you can redistribute it and/or
;; modify it under the terms of the GNU Lesser General Public
;; License as published by the Free Software Foundation; either
;; version 3 of the License, or (at your option) any later version.
;;
;; This library is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;; Lesser General Public License for more details.
;;
;; You should have received a copy of the GNU Lesser General Public
;; License along with this library; if not, write to the Free Software
;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

;;; Commentary:

;; The base64 encoding (rfc 2045) is basically a 3-byte to
;; 4-byte transform:
;;
;; GGGGGGGG NNNNNNNN UUUUUUUU <=> GGGGGG GGNNNN NNNNUU UUUUUU
;; GGGGGGRR RRRROOOO OOKKKKKK <=> GGGGGG RRRRRR OOOOOO KKKKKK
;;
;; modulo line breaks and terminating delimiters.  It is widely used
;; in MIME and other protocols where the transmission medium may not
;; be guaranteed 8-bit clean.

;;; Code:

(define-module (ice-9 base64)
  #:export (base64-encode base64-decode)
  #:use-module ((rnrs io ports) #:select (get-u8)))

;;; support

(define (getb!-proc port)
  (lambda ()
    (let ((b (get-u8 port)))
      (and (not (eof-object? b)) b))))

(define (analyze-input input)           ; => (getb! . exp-input)
  (cond ((string? input)
         (cons (getb!-proc (open-input-string input))
               (string-length input)))
        ((port? input)
         (cons (getb!-proc input)
               (and (port-filename input)
                    (let ((guess (false-if-exception
                                  (stat:size (stat input)))))
                      (and (number? guess)
                           (positive? guess)
                           guess)))))
        (else #f)))

(define c2i char->integer)
(define i2c integer->char)

(define (char+ c n)
  (i2c (+ (c2i c) n)))

(define *reasonable-chunk* (ash 1 (ash 1 (ash 1 (ash 1 (ash 1 0)))))) ; 64 KiB

(define subs substring)

(define (make-buffer-stack size)
  (let ((cur #f) (stack '()) (idx size))

    (define (new!)
      (set! cur (make-string size))
      (set! stack (cons cur stack))
      (set! idx 0))

    (define (spill!)
      (or (= idx size) (set-car! stack (subs (car stack) 0 idx)))
      (let ((res (apply string-append (reverse! stack))))
        (set! cur #f)
        (set! stack '())
        (set! idx size)
        res))

    ;; rv
    (lambda (c)
      (cond (c (and (= idx size) (new!))
               (string-set! cur idx c)
               (set! idx (1+ idx)))
            (else (spill!))))))

(define (make-outp-buffer outp size)
  (let ((buf (make-string size)) (idx 0) (count 0))

    (define (>OUT flush? x)    ;;; todo: make non-blocking if (not flush?)
      (display x outp)
      (and flush? (force-output outp)))

    (define (new!)
      (>OUT #f buf)
      (set! idx 0))

    (define (spill!)
      (>OUT #t (if (= idx size) buf (subs buf 0 idx)))
      (set! idx 0)
      (let ((rv count))
        (set! count 0)
        rv))

    ;; rv
    (lambda (c)
      (cond (c (and (= idx size) (new!))
               (string-set! buf idx c)
               (set! idx (1+ idx))
               (set! count (1+ count)))
            (else (spill!))))))

(define (make-ob! output size)          ; output bufferer
  (if output
      (make-outp-buffer (if (eq? #t output)
                            (current-output-port)
                            output)
                        (min *reasonable-chunk* size))
      (make-buffer-stack size)))

;;; encoding

(define *enc-map*
  (let ((em (make-vector 64 #f)))
    (vector-set! em 62 #\+)
    (vector-set! em 63 #\/)
    (do ((i 0 (1+ i)))
        ((= i 26) em)                   ; rv
      (and (< i 10)
           (vector-set! em (+ 52 i) (char+ #\0 i)))
      (vector-set! em (+ 0 i) (char+ #\A i))
      (vector-set! em (+ 26 i) (char+ #\a i)))))

(define (encode! output getb! line-break crlf exp-input)
  (let* ((bgrp (and line-break (quotient (max 4 line-break) 4)))
         (ob! (make-ob! output
                        (if exp-input
                            ;; compute exact result size
                            (let ((raw (* 4 (inexact->exact
                                             (ceiling (/ (1- exp-input)
                                                         3))))))
                              (+ raw (if bgrp
                                         (* (quotient raw (* 4 bgrp))
                                            (if crlf 2 1))
                                         0)))
                            *reasonable-chunk*)))
         (lb! (if crlf
                  (lambda () (ob! #\cr) (ob! #\newline))
                  (lambda ()            (ob! #\newline)))))

    (define (acc! x)
      (ob! (if x (vector-ref *enc-map* x) #\=)))

    (let loop ((group bgrp))
      (let* ((g (getb!))                ; do not use `let' here
             (n (getb!))
             (u (getb!)))
        (cond (g
               (and bgrp (zero? group) (begin (lb!) (set! group bgrp)))
               (acc! (ash g -2))
               (acc! (and (or g n)
                          (logior (ash (logand (or g 0) 3) 4)
                                  (logand (ash (or n 0) -4) 15))))
               (acc! (and (or n u)
                          (logior (ash (logand (or n 0) 15) 2)
                                  (ash (or u 0) -6))))
               (acc! (and u (logand u 63)))))
        (if (and g n u)
            (loop (and bgrp (1- group)))
            (ob! #f))))))

;; Write to @var{out-port} the result of base64-encoding @var{input} and
;; return the number of bytes written.  If @var{out-port} is #t, send to
;; the current output port.  If @var{out-port} is #f, return the
;; result as a string, instead.  @var{input} may be a string or a port.
;;
;; Optional third arg @var{line-break} specifies the maximum number of columns
;; to appear in the result before a line break.  Actual number of columns is a
;; rounded-down multiple of four, but not less than four.  The result never
;; ends with a line break.  #f means omit line breaks entirely.
;;
;; Optional fourth arg @var{crlf?} non-#f means use @sc{crlf} for line breaks
;; instead of simply @sc{lf}.
;;
;;-sig: (out-port input [line-break [crlf?]])
;;
(define (base64-encode out-port input . opts)
  (or (and=> (analyze-input input)
             (lambda (pair)             ; (getb! . exp-input)
               (encode! out-port
                        (car pair)
                        (and (not (null? opts))
                             (car opts))
                        (and (not (null? opts))
                             (not (null? (cdr opts)))
                             (cadr opts))
                        (cdr pair))))
      (error "bad input:" input)))

;;; decoding

(define *dec-map*
  (let ((dm (make-vector 256 #f)))
    (do ((i 0 (1+ i)))
        ((= i 64) dm)                   ; rv
      (vector-set! dm (c2i (vector-ref *enc-map* i)) i))))

(define byte-whitespace?
  (let ((ws-bytes (map c2i '(#\space #\np #\nl #\cr #\ht #\vt))))
    (lambda (b) (memq b ws-bytes))))

(define (decode! output getb! exp-input)
  (let ((ob! (make-ob!
              output
              (if exp-input
                  ;; approximate result size (whitespace not known a priori)
                  (inexact->exact (ceiling (* 3 (/ (1+ exp-input) 4))))
                  *reasonable-chunk*))))

    (define (acc! x)
      (ob! (i2c x)))

    (define (find!)
      (let ((b (getb!)))
        (and b (if (byte-whitespace? b)
                   (find!)
                   (vector-ref *dec-map* b)))))

    ;; do it!
    (let loop ()
      (let* ((g (find!))                ; do not use `let' here
             (r (find!))
             (o (find!))
             (k (find!)))
        (and g r (acc! (logior (ash g 2) (ash r -4))))
        (and r o (acc! (logior (ash (logand r 15) 4) (ash o -2))))
        (and o k (acc! (logior (ash (logand o 3) 6) k)))
        (if (and g r o k)
            (loop)
            (ob! #f))))))

;; Write to @var{out-port} the result of base64-decoding @var{input} and
;; return the number of bytes written.  If @var{out-port} is #t, send to
;; the current output port.  If @var{out-port} is #f, return the
;; result as a string, instead.  @var{input} may be a string or a port.
;;
(define (base64-decode out-port input)
  (or (and=> (analyze-input input)
             (lambda (pair)             ; (getb! . exp-input)
               (decode! out-port
                        (car pair)
                        (cdr pair))))
      (error "bad input:" input)))

;;; base64.scm ends here

      reply	other threads:[~2010-09-28 12:48 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-09-01 18:10 Guile base64 Romel Sandoval
2010-09-02  8:12 ` Ludovic Courtès
2010-09-02 18:49   ` Andy Wingo
2010-09-02 23:34     ` Andreas Rottmann
2010-09-03 20:15       ` Romel Sandoval
2010-09-05 21:58         ` Thien-Thi Nguyen
2010-09-05 22:47           ` Ludovic Courtès
2010-09-06  9:39             ` Thien-Thi Nguyen
2010-09-06  9:59           ` Andy Wingo
2010-09-28 12:48             ` Thien-Thi Nguyen [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://www.gnu.org/software/guile/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87tylayqxn.fsf@ambire.localdomain \
    --to=ttn@gnuvola.org \
    --cc=guile-user@gnu.org \
    --cc=wingo@pobox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).