;;; GNU Guix --- Functional package management for GNU ;;; Copyright © 2020 Ludovic Courtès ;;; ;;; This file is part of GNU Guix. ;;; ;;; GNU Guix is free software; you can redistribute it and/or modify it ;;; under the terms of the GNU General Public License as published by ;;; the Free Software Foundation; either version 3 of the License, or (at ;;; your option) any later version. ;;; ;;; GNU Guix is distributed in the hope that it will be useful, but ;;; WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;;; GNU General Public License for more details. ;;; ;;; You should have received a copy of the GNU General Public License ;;; along with GNU Guix. If not, see . (define-module (tar) #:use-module (ice-9 match) #:use-module (ice-9 binary-ports) #:use-module (rnrs bytevectors) #:use-module (srfi srfi-1) #:use-module (srfi srfi-9) #:use-module (srfi srfi-26) #:use-module (gcrypt hash) #:use-module (guix base16) #:use-module (guix base32) #:use-module ((ice-9 rdelim) #:select ((read-string . get-string-all))) #:use-module (web client) #:use-module (web response) #:export (disassemble-archive assemble-archive)) ;;; ;;; Tar. ;;; (define %TMAGIC "ustar\0") (define %TVERSION "00") (define-syntax-rule (define-field-type type type-size read-proc write-proc) "Define TYPE as a ustar header field type of TYPE-SIZE bytes. READ-PROC is the procedure to obtain the value of an object of this type froma bytevector, and WRITE-PROC writes it to a bytevector." (define-syntax type (syntax-rules (read write size) ((_ size) type-size) ((_ read) read-proc) ((_ write) write-proc)))) (define (sub-bytevector bv offset size) (let ((sub (make-bytevector size))) (bytevector-copy! bv offset sub 0 size) sub)) (define (read-integer bv offset len) (string->number (read-string bv offset len) 8)) (define read-integer12 (cut read-integer <> <> 12)) (define read-integer8 (cut read-integer <> <> 8)) (define (read-string bv offset max-len) (define len (let loop ((len 0)) (cond ((= len max-len) len) ((zero? (bytevector-u8-ref bv (+ offset len))) len) (else (loop (+ 1 len)))))) (utf8->string (sub-bytevector bv offset len))) (define read-string155 (cut read-string <> <> 155)) (define read-string100 (cut read-string <> <> 100)) (define read-string32 (cut read-string <> <> 32)) (define read-string6 (cut read-string <> <> 6)) (define read-string2 (cut read-string <> <> 2)) (define (read-character bv offset) (integer->char (bytevector-u8-ref bv offset))) (define (read-padding12 bv offset) (bytevector-uint-ref bv offset (endianness big) 12)) (define (write-integer! bv offset value len) (let ((str (string-pad (number->string value 8) (- len 1) #\0))) (write-string! bv offset str len))) (define write-integer12! (cut write-integer! <> <> <> 12)) (define write-integer8! (cut write-integer! <> <> <> 8)) (define (write-string! bv offset str len) (let* ((str (string-pad-right str len #\nul)) (buf (string->utf8 str))) (bytevector-copy! buf 0 bv offset (bytevector-length buf)))) (define write-string155! (cut write-string! <> <> <> 155)) (define write-string100! (cut write-string! <> <> <> 100)) (define write-string32! (cut write-string! <> <> <> 32)) (define write-string6! (cut write-string! <> <> <> 6)) (define write-string2! (cut write-string! <> <> <> 2)) (define (write-character! bv offset value) (bytevector-u8-set! bv offset (char->integer value))) (define (write-padding12! bv offset value) (bytevector-uint-set! bv offset value (endianness big) 12)) (define-field-type integer12 12 read-integer12 write-integer12!) (define-field-type integer8 8 read-integer8 write-integer8!) (define-field-type character 1 read-character write-character!) (define-field-type string155 155 read-string155 write-string155!) (define-field-type string100 100 read-string100 write-string100!) (define-field-type string32 32 read-string32 write-string32!) (define-field-type string6 6 read-string6 write-string6!) (define-field-type string2 2 read-string2 write-string2!) (define-field-type padding12 12 read-padding12 write-padding12!) (define-syntax define-pack (syntax-rules () ((_ type ctor pred write-header read-header (field-names field-types field-getters) ...) (begin (define-record-type type (ctor field-names ...) pred (field-names field-getters) ...) (define (read-header port) "Return the ustar header read from PORT." (set-port-encoding! port "ISO-8859-1") (let ((bv (get-bytevector-n port (+ (field-types size) ...)))) (letrec-syntax ((build (syntax-rules () ((_ bv () offset (fields (... ...))) (ctor fields (... ...))) ((_ bv (type0 types (... ...)) offset (fields (... ...))) (build bv (types (... ...)) (+ offset (type0 size)) (fields (... ...) ((type0 read) bv offset))))))) (build bv (field-types ...) 0 ())))) (define (write-header header port) "Serialize HEADER, a record, to PORT." (let* ((len (+ (field-types size) ...)) (bv (make-bytevector len))) (match header (($ type field-names ...) (letrec-syntax ((write! (syntax-rules () ((_ () offset) #t) ((_ ((type value) rest (... ...)) offset) (begin ((type write) bv offset value) (write! (rest (... ...)) (+ offset (type size)))))))) (write! ((field-types field-names) ...) 0) (put-bytevector port bv)))))))))) ;; The ustar header. See . (define-pack %make-ustar-header ustar-header? write-ustar-header read-ustar-header (name string100 ustar-header-name) ;NUL-terminated if NUL fits (mode integer8 ustar-header-mode) (uid integer8 ustar-header-uid) (gid integer8 ustar-header-gid) (size integer12 ustar-header-size) (mtime integer12 ustar-header-mtime) (chksum integer8 ustar-header-checksum) (typeflag character ustar-header-type-flag) (linkname string100 ustar-header-link-name) (magic string6 ustar-header-magic) ;must be TMAGIC (version string2 ustar-header-version) ;must be TVERSION (uname string32 ustar-header-uname) ;NUL-terminated (gname string32 ustar-header-gname) ;NUL-terminated (devmajor integer8 ustar-header-device-major) (devminor integer8 ustar-header-device-minor) (prefix string155 ustar-header-prefix) ;NUL-terminated if NUL fits (padding padding12 ustar-header-padding)) (define* (make-ustar-header name #:key (mode 0) (uid 0) (gid 0) (size 0) (mtime 0) (checksum 0) (type-flag 0) (link-name "") (magic %TMAGIC) (version %TVERSION) (uname "") (gname "") (device-major 0) (device-minor 0) (prefix "") (padding 0)) (%make-ustar-header name mode uid gid size mtime checksum type-flag link-name magic version uname gname device-major device-minor prefix padding)) (define %zero-header ;; The all-zeros header, which marks the end of stream. (read-ustar-header (open-bytevector-input-port (make-bytevector 512 0)))) (define (consumer port) "Return a procedure that consumes or skips the given number of bytes from PORT." (if (false-if-exception (seek port 0 SEEK_CUR)) (lambda (len) (seek port len SEEK_CUR)) (lambda (len) (define bv (make-bytevector 8192)) (let loop ((len len)) (define block (min len (bytevector-length bv))) (unless (or (zero? block) (eof-object? (get-bytevector-n! port bv 0 block))) (loop (- len block))))))) (define (fold-archive proc seed port) "Read ustar headers from PORT; for each header, call PROC." (define skip (consumer port)) (let loop ((result seed)) (define header (read-ustar-header port)) (if (equal? header %zero-header) result (let* ((result (proc header port result)) (size (ustar-header-size header)) (remainder (modulo size 512))) ;; It's up to PROC to consume the SIZE bytes of data corresponding ;; to HEADER. Here we consume padding. (unless (zero? remainder) (skip (- 512 remainder))) (loop result))))) ;;; ;;; Disassembling/assembling an archive. ;;; (define (dump in out size) "Copy SIZE bytes from IN to OUT." (define buf-size 65536) (define buf (make-bytevector buf-size)) (let loop ((left size)) (if (<= left 0) 0 (let ((read (get-bytevector-n! in buf 0 (min left buf-size)))) (if (eof-object? read) left (begin (put-bytevector out buf 0 read) (loop (- left read)))))))) (define* (disassemble-archive port #:optional (algorithm (hash-algorithm sha256))) "Read tar archive from PORT and return an sexp representing its metadata, including individual file hashes with ALGORITHM." (define headers+hashes (fold-archive (lambda (header port result) (if (zero? (ustar-header-size header)) (alist-cons header #f result) (let () (define-values (hash-port get-hash) (open-hash-port algorithm)) (dump port hash-port (ustar-header-size header)) (close-port hash-port) (alist-cons header (get-hash) result)))) '() port)) (define header+hash->sexp (match-lambda ((header . hash) (letrec-syntax ((serialize (syntax-rules () ((_) '()) ((_ (tag get default) rest ...) (let ((value (get header))) (append (if (equal? default value) '() `((tag ,value))) (serialize rest ...)))) ((_ (tag get) rest ...) (append `((tag ,(get header))) (serialize rest ...)))))) `(,(ustar-header-name header) ,@(serialize (mode ustar-header-mode) (uid ustar-header-uid 0) (gid ustar-header-gid 0) (size ustar-header-size) (mtime ustar-header-mtime) (chksum ustar-header-checksum) (typeflag ustar-header-type-flag #\nul) (linkname ustar-header-link-name "") (magic ustar-header-magic "") (version ustar-header-version "") (uname ustar-header-uname "") (gname ustar-header-gname "") (devmajor ustar-header-device-major 0) (devminor ustar-header-device-minor 0) (prefix ustar-header-prefix "") (padding ustar-header-padding 0) (hash (lambda (_) (and hash `(,(hash-algorithm-name algorithm) ,(bytevector->base32-string hash)))) #f))))))) `(tar-source (version 0) (headers ,(map header+hash->sexp (reverse headers+hashes))))) (define (fetch-from-swh algorithm hash) (define url (string-append "https://archive.softwareheritage.org/api/1/content/" (symbol->string algorithm) ":" (bytevector->base16-string hash) "/raw/")) (define-values (response port) (http-get url #:streaming? #t #:verify-certificate? #f)) (if (= 200 (response-code response)) port (throw 'swh-fetch-error url (get-string-all port)))) (define* (assemble-archive source port #:optional (fetch-data fetch-from-swh)) "Assemble archive from SOURCE, an sexp as returned by 'disassemble-archive'." (define sexp->header (match-lambda ((name . properties) (let ((ref (lambda (field) (and=> (assq-ref properties field) car)))) (make-ustar-header name #:mode (ref 'mode) #:uid (or (ref 'uid) 0) #:gid (or (ref 'gid) 0) #:size (ref 'size) #:mtime (ref 'mtime) #:checksum (ref 'chksum) #:type-flag (or (ref 'typeflag) #\nul) #:link-name (or (ref 'linkname) "") #:magic (or (ref 'magic) "") #:version (or (ref 'version) "") #:uname (or (ref 'uname) "") #:gname (or (ref 'gname) "") #:device-major (or (ref 'devmajor) 0) #:device-minor (or (ref 'devminor) 0) #:prefix (or (ref 'prefix) "") #:padding (or (ref 'padding) 0)))))) (define sexp->data (match-lambda ((name . properties) (match (assq-ref properties 'hash) (((algorithm (= base32-string->bytevector hash)) _ ...) (fetch-data algorithm hash)) (#f (open-input-string "")))))) (match source (('tar-source ('version 0) ('headers headers) _ ...) (for-each (lambda (sexp) (let ((header (sexp->header sexp)) (data (sexp->data sexp))) (write-ustar-header header port) (dump-port data port) (close-port data))) headers))))