* module (database fcookie)
@ 2004-06-17 11:58 Thien-Thi Nguyen
0 siblings, 0 replies; only message in thread
From: Thien-Thi Nguyen @ 2004-06-17 11:58 UTC (permalink / raw)
Cc: guile-user
folks,
the module (database fcookie) below or more likely, a slightly improved
version, will appear in guile 1.4.1.99. see Notes at end for future
direction hint. w/ this module and a SMOP, i can now do:
mkdir -p ~/local/share/games/fortunes
cp -p /usr/share/games/fortunes/* ~/local/share/games/fortunes
apt-get --purge remove fortunes-min fortune-mod
12K for fcookie.scm, 124K stashed, 344K freed => 208K net savings.
thi
______________________________________________________________________
;;; fcookie.scm --- cookie file read, cookie-index file read/write
;; Copyright (C) 2004 Free Software Foundation, Inc.
;;
;; This program is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 2, or (at your option)
;; any later version.
;;
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with this software; see the file COPYING. If not, write to
;; the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
;; Boston, MA 02111-1307 USA
;;
;; As a special exception, the Free Software Foundation gives permission
;; for additional uses of the text contained in its release of GUILE.
;;
;; The exception is that, if you link the GUILE library with other files
;; to produce an executable, this does not by itself cause the
;; resulting executable to be covered by the GNU General Public License.
;; Your use of that executable is in no way restricted on account of
;; linking the GUILE library code into it.
;;
;; This exception does not however invalidate any other reasons why
;; the executable file might be covered by the GNU General Public License.
;;
;; This exception applies only to the code released by the
;; Free Software Foundation under the name GUILE. If you copy
;; code from other Free Software Foundation releases into a copy of
;; GUILE, as the General Public License permits, the exception does
;; not apply to the code that you add in this way. To avoid misleading
;; anyone as to the status of such modified files, you must delete
;; this exception notice from them.
;;
;; If you write modifications of your own for GUILE, it is your choice
;; whether to permit this exception to apply to your modifications.
;; If you do not wish that, delete this exception notice.
;;; Commentary:
;; This module provides two procs:
;; (create-index-file! out-name in-name delim flags) => #t for success
;; (fortune-cookie cookie-filename [dat-filename]) => string
;;
;; A fortune cookie file is a text file containing groups of lines separated
;; by "delim lines", a specially chosen character (traditionally the percent
;; `%' character) on a line by itself. The first and last lines must also be
;; delim lines. Each group of lines thus delimited is a "cookie". Thus, a
;; file with N cookies has N+1 delim lines.
;;
;; A fortune cookie index file is a binary file consisting of a six-word
;; header followed by the offset table (each entry a word). A word is four
;; bytes in network (big-endian) order. The header is:
;;
;; version -- typically 1 for old files and 2 for newer ones
;; count -- number of cookies
;; longest -- number of bytes of longest cookie
;; shortest -- number of bytes of shortest cookie
;; flags -- logior of #x1 (random)
;; #x2 (ordered)
;; #x4 (rotated)
;; delim-char -- this is shifted to the MSB position
;; (the remaining bytes are #\nul)
;;
;; Traditionally, for cookie file foo, the index file is named foo.dat, but
;; that is not required.
;;; Code:
(define-module (database fcookie)
#:use-module ((ice-9 rdelim) #:select (read-line))
#:use-module ((database binconv) #:select (integer-byte-string->integer
integer->integer-byte-string))
#:use-module ((scripts slurp) #:select (slurp slurp-file!))
#:export (create-index-file!
fortune-cookie))
(define sub make-shared-substring)
(define (k name)
(case name
((#:format-version) 2) ; hmmm
((#:sizeof-word) 4)
((#:all-flags) '(#:random #:ordered #:rotated))
((#:random) #x1)
((#:ordered) #x2)
((#:rotated) #x4)
((#:bits-per-byte) 8) ; you never know...
((#:delim+newline) 2)
((#:all-headers) '(#:version #:count #:longest #:shortest
#:flags #:delim #:filler))))
(define (read-word-proc port)
(let ((s (make-string (k #:sizeof-word))))
(lambda ()
(string-set! s 0 (read-char port))
(string-set! s 1 (read-char port))
(string-set! s 2 (read-char port))
(string-set! s 3 (read-char port))
(integer-byte-string->integer s #f #t))))
(define (write-word-proc port)
(let* ((sz (k #:sizeof-word))
(s (make-string sz)))
(lambda (n)
(display (integer->integer-byte-string n sz #f #t s) port))))
(define (words<-cookie-file filename delim flags)
(let* ((p (open-input-file filename))
(next (lambda () (read-line p 'concat)))
(count -1) (shortest #f) (longest #f)
(box (list #f))
(tp box)
(order? (memq #:ordered flags)))
;; If `order?', accumulate (OFFSET . COOKIE-TRIMMED-TO-FIRST-ALPHANUMERIC),
;; then sort ascending. Otherwise, accumulate OFFSET only.
(let loop ((line (next)) (acc '()))
(cond ((eof-object? line)
(close-port p)
(list* ; rv
;; header
(k #:format-version)
count
longest
shortest
(apply logior (map (lambda (flag)
(if (memq flag flags)
(k flag)
0))
(k #:all-flags)))
(ash (char->integer delim)
(* (1- (k #:sizeof-word))
(k #:bits-per-byte)))
;; offset table
(if order?
(map car (sort (cdr box) (lambda (a b)
(string<? (cdr a) (cdr b)))))
(cdr box))))
((and (= (k #:delim+newline) (string-length line))
(char=? delim (string-ref line 0)))
(let ((ofs (seek p 0 SEEK_CUR))
(prev (car tp)))
(and prev
(let ((len (- ofs
(if order?
(car prev)
prev)
(k #:delim+newline))))
(cond ((not shortest)
(set! shortest len)
(set! longest len))
(else
(set! shortest (min shortest len))
(set! longest (max longest len))))))
(set-cdr! tp (list
(if order?
(cons ofs
(let* ((full (apply string-append
(reverse acc)))
(len (string-length full)))
(do ((i 0 (1+ i)))
((or (= len i)
(let ((c (string-ref full i)))
(or (char-alphabetic? c)
(char-numeric? c))))
(sub full i)))))
ofs))))
(set! tp (cdr tp))
(set! count (1+ count))
(loop (next) '()))
(else
(loop (next) (cons line acc)))))))
;; Create index file @var{out-name} from cookie file @var{in-name}, separating
;; cookies by looking for char @var{delim} on a line by itself. Optional
;; @var{flags} are keywords:
;;
;; @table @code
;; @item #:random
;; Set bit 0 (corresponding to a mask of #x1) in the flags word in the header,
;; but do nothing else at the moment (FIXME).
;;
;; @item #:ordered
;; Set bit 1 (corresponding to a mask of #x2) in the flags word in the header,
;; and order the offsets by sorting the cookies with @code{string<?}, ignoring
;; non-alphanumeric leading characters.
;;
;; @item #:rotated
;; Set bit 2 (corresponding to a mask of #x4) in the flags word in the header,
;; to note that the cookies are @dfn{ROT13}.
;; @end table
;;
;; Return #t on success.
;;
(define (create-index-file! out-name in-name delim . flags)
(let* ((words (words<-cookie-file in-name delim flags))
(outp (open-output-file out-name))
(ww (write-word-proc outp)))
(for-each ww words)
(close-port outp)))
(define (grok-header readc readw)
(let ((info (map cons
(k #:all-headers)
(list (readw)
(readw)
(readw)
(readw)
(let ((w (readw)))
(let loop ((ls (k #:all-flags)) (acc '()))
(if (null? ls)
acc
(let ((flag (car ls)))
(loop (cdr ls)
(if (= 0 (logand (k flag) w))
acc
(cons flag acc)))))))
(readc)
(map (lambda ignored
(readc))
(iota (1- (k #:sizeof-word))))))))
;; rv
(lambda (name)
(assq-ref info name))))
(define (get-cookie cookie-file dat-file)
(let* ((port (open-input-file dat-file))
(readw (read-word-proc port))
(qh (grok-header (lambda () (read-char port)) readw))
(selection (random (qh #:count)))
(start (do ((i 0 (1+ i)))
((= i selection) (readw))
(readw)))
(need-scan? (memq #:ordered (qh #:flags)))
(len (if need-scan?
(min (qh #:longest)
(- (stat:size (stat cookie-file)) start))
(- (readw) start (k #:delim+newline))))
(cookie (make-string len)))
(close-port port)
(slurp-file! cookie cookie-file start len 0)
(and need-scan?
(let ((delim (qh #:delim))
(cookie-char=? (lambda (c n)
(char=? c (string-ref cookie n)))))
(let loop ((nl (string-index cookie #\nl 0)))
(and nl (if (and (<= (+ nl (k #:delim+newline)) len)
(cookie-char=? delim (1+ nl))
(cookie-char=? #\nl (+ nl 2)))
(set! cookie (sub cookie 0 (1+ nl)))
(loop (string-index cookie #\nl (1+ nl))))))))
(if (memq #:rotated (qh #:flags))
(list->string
(let ((a-n (char->integer #\a))
(A-n (char->integer #\A))
(rot (lambda (base n)
(integer->char (+ base (modulo (+ (- n base) 13) 26))))))
(map (lambda (c)
(let ((n (char->integer c)))
(cond ((char<=? #\a c #\z) (rot a-n n))
((char<=? #\A c #\Z) (rot A-n n))
(else c))))
(string->list cookie))))
cookie)))
;; Return a randomly-chosen string extracted from @var{cookie-file},
;; using the index file named by appending @file{.dat} to @var{cookie-file}.
;; Optional arg @var{dat-file} specifies the index file to use instead of the
;; default.
;;
;;-sig: (cookie-file [dat-file])
;;
(define (fortune-cookie cookie-file . dat-file)
(get-cookie cookie-file (if (null? dat-file)
(string-append cookie-file ".dat")
(car dat-file))))
;; Notes
;;
;; This module is actually a practice run (fodder for factoring) for some
;; future `(database isam)' module, since it does in fact implement indexed
;; sequential access methods --- not very sophisticated but it's a start.
;; And who can resist a fortune cookie?
;;; fcookie.scm ends here
_______________________________________________
Guile-user mailing list
Guile-user@gnu.org
http://lists.gnu.org/mailman/listinfo/guile-user
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2004-06-17 11:58 UTC | newest]
Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2004-06-17 11:58 module (database fcookie) Thien-Thi Nguyen
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).