From mboxrd@z Thu Jan 1 00:00:00 1970 From: Mark H Weaver Subject: [PATCH] union: Rewrite to be faster; handle symlink/directory conflicts Date: Wed, 02 Apr 2014 14:37:29 -0400 Message-ID: <871txfk26u.fsf@yeeloong.lan> Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="=-=-=" Return-path: Received: from eggs.gnu.org ([2001:4830:134:3::10]:33806) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1WVQ3e-0008PO-Nn for guix-devel@gnu.org; Wed, 02 Apr 2014 14:38:47 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1WVQ3a-0005La-4N for guix-devel@gnu.org; Wed, 02 Apr 2014 14:38:42 -0400 Received: from world.peace.net ([96.39.62.75]:43286) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1WVQ3Z-0005LV-TJ for guix-devel@gnu.org; Wed, 02 Apr 2014 14:38:38 -0400 List-Id: "Development of GNU Guix and the GNU System distribution." List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: guix-devel-bounces+gcggd-guix-devel=m.gmane.org@gnu.org Sender: guix-devel-bounces+gcggd-guix-devel=m.gmane.org@gnu.org To: guix-devel@gnu.org --=-=-= Content-Type: text/plain This patch makes union.scm identical to the last one I posted here, and removes the tests of procedures that no longer exist. Mark --=-=-= Content-Type: text/x-patch; charset=utf-8 Content-Disposition: inline; filename=0001-union-Rewrite-to-be-faster-handle-symlink-directory-.patch Content-Transfer-Encoding: quoted-printable Content-Description: [PATCH] union: Rewrite to be faster; handle symlink/directory conflicts >From 3f503705098745ddd54251a46b5634e78b209e5f Mon Sep 17 00:00:00 2001 From: Mark H Weaver Date: Fri, 28 Mar 2014 03:54:01 -0400 Subject: [PATCH] union: Rewrite to be faster; handle symlink/directory conflicts. * guix/build/union.scm: Rewrite; only 'file=3D?' remains unchanged. Remove 'tree-union' and 'delete-duplicate-leaves' exports. Merge inputs in a breadth-first fashion. Follow symlinks for purposes of making decisions about the merge. * tests/union.scm: Remove tests of 'tree-union' and 'delete-duplicate-leave= s'. --- guix/build/union.scm | 252 +++++++++++++++++------------------------------= ---- tests/union.scm | 41 --------- 2 files changed, 85 insertions(+), 208 deletions(-) diff --git a/guix/build/union.scm b/guix/build/union.scm index 6e2b296..c65bea4 100644 --- a/guix/build/union.scm +++ b/guix/build/union.scm @@ -1,5 +1,6 @@ ;;; GNU Guix --- Functional package management for GNU ;;; Copyright =C2=A9 2012, 2013, 2014 Ludovic Court=C3=A8s +;;; Copyright =C2=A9 2014 Mark H Weaver ;;; ;;; This file is part of GNU Guix. ;;; @@ -17,16 +18,13 @@ ;;; along with GNU Guix. If not, see . =20 (define-module (guix build union) - #:use-module (ice-9 ftw) #:use-module (ice-9 match) #:use-module (ice-9 format) #:use-module (srfi srfi-1) #:use-module (srfi srfi-26) #:use-module (rnrs bytevectors) #:use-module (rnrs io ports) - #:export (tree-union - delete-duplicate-leaves - union-build)) + #:export (union-build)) =20 ;;; Commentary: ;;; @@ -35,72 +33,20 @@ ;;; ;;; Code: =20 -(define (tree-union trees) - "Return a tree that is the union of the trees listed in TREES. Each -tree has the form (PARENT LEAVES ...) or just LEAF, where each leaf is -itself a tree. " - (let loop ((trees trees)) - (match trees - (() ; nothing left - '()) - (_ - (let ((dirs (filter pair? trees)) - (leaves (remove pair? trees))) - `(,@leaves - ,@(fold (lambda (dir result) - (cons `(,dir - ,@(loop - (concatenate - (filter-map (match-lambda - ((head children ...) - (and (equal? head dir) - children))) - dirs)))) - result)) - '() - (delete-duplicates (map car dirs))))))))) - -(define* (delete-duplicate-leaves tree - #:optional - (leaf=3D? equal?) - (delete-duplicates (match-lambda - ((head _ ...) head))= )) - "Delete duplicate leaves from TREE. Two leaves are considered equal -when LEAF=3D? applied to them returns #t. Each collision (list of leaves -that are LEAF=3D?) is passed to DELETE-DUPLICATES, which must return a -single leaf." - (let loop ((tree tree)) - (match tree - ((dir children ...) - (let ((dirs (filter pair? children)) - (leaves (remove pair? children))) - (define collisions - (fold (lambda (leaf result) - (define same? - (cut leaf=3D? leaf <>)) - - (if (any (cut find same? <>) result) - result - (match (filter same? leaves) - ((_) - result) - ((collision ...) - (cons collision result))))) - '() - leaves)) - - (define non-collisions - (filter (lambda (leaf) - (match (filter (cut leaf=3D? leaf <>) leaves) - ((_) #t) - ((_ _ ..1) #f))) - leaves)) - - `(,dir - ,@non-collisions - ,@(map delete-duplicates collisions) - ,@(map loop dirs)))) - (leaf leaf)))) +(define (files-in-directory dirname) + (let ((dir (opendir dirname))) + (let loop ((files '())) + (match (readdir dir) + ((or "." "..") + (loop files)) + ((? eof-object?) + (closedir dir) + (sort files string) rest) - (format (current-error-port) "warning: collision encountered: ~{~= a ~}~%" - lst) - - ;; TODO: Implement smarter strategies. - (format (current-error-port) "warning: arbitrarily choosing ~a~%" - head)) - head))) +the INPUTS." + + (define (symlink* input output) + (format log-port "`~a' ~~> `~a'~%" input output) + (symlink input output)) + + (define (resolve-collisions output dirs files) + (cond ((null? dirs) + ;; The inputs are all files. + (format (current-error-port) + "warning: collision encountered: ~{~a ~}~%" + files) + + (let ((file (first files))) + ;; TODO: Implement smarter strategies. + (format (current-error-port) + "warning: arbitrarily choosing ~a~%" + file) + + (symlink* file output))) + + (else + ;; The inputs are a mixture of files and directories + (error "union-build: collision between file and directories" + `((files ,files) (dirs ,dirs)))))) + + (define (union output inputs) + (match inputs + ((input) + ;; There's only one input, so just make a link. + (symlink* input output)) + (_ + (call-with-values (lambda () (partition file-is-directory? inputs)) + (match-lambda* + ((dirs ()) + ;; All inputs are directories. Create a new directory + ;; where we will merge the input directories. + (mkdir output) + + ;; Build a hash table mapping each file to a list of input + ;; directories containing that file. + (let ((table (make-hash-table))) + + (define (add-to-table! file dir) + (hash-set! table file (cons dir (hash-ref table file '()))= )) + + ;; Populate the table. + (for-each (lambda (dir) + (for-each (cut add-to-table! <> dir) + (files-in-directory dir))) + dirs) + + ;; Now iterate over the table and recursively + ;; perform a union for each entry. + (hash-for-each (lambda (file dirs-with-file) + (union (string-append output "/" file) + (map (cut string-append <> "/" file) + (reverse dirs-with-file)))) + table))) + + ((() (file (? (cut file=3D? <> file)) ...)) + ;; There are no directories, and all files have the same conte= nts, + ;; so there's no conflict. + (symlink* file output)) + + ((dirs files) + (resolve-collisions output dirs files))))))) =20 (setvbuf (current-output-port) _IOLBF) (setvbuf (current-error-port) _IOLBF) (when (file-port? log-port) (setvbuf log-port _IOLBF)) =20 - (mkdir output) - (let loop ((tree (delete-duplicate-leaves - (cons "." - (tree-union - (append-map (compose tree-leaves file-tree) - (delete-duplicates directories)))) - leaf=3D? - resolve-collision)) - (dir '())) - (match tree - ((? string?) - ;; A leaf: create a symlink. - (let* ((dir (string-join dir "/")) - (target (string-append output "/" dir "/" (basename tree)))) - (format log-port "`~a' ~~> `~a'~%" tree target) - (symlink tree target))) - (((? string? subdir) leaves ...) - ;; A sub-directory: create it in OUTPUT, and iterate over LEAVES. - (unless (string=3D? subdir ".") - (let ((dir (string-join dir "/"))) - (mkdir (string-append output "/" dir "/" subdir)))) - (for-each (cute loop <> `(,@dir ,subdir)) - leaves)) - ((leaves ...) - ;; A series of leaves: iterate over them. - (for-each (cut loop <> dir) leaves))))) + (union output (delete-duplicates inputs))) =20 ;;; union.scm ends here diff --git a/tests/union.scm b/tests/union.scm index 3ebf483..f63329a 100644 --- a/tests/union.scm +++ b/tests/union.scm @@ -43,47 +43,6 @@ (test-begin "union") =20 -(test-equal "tree-union, empty" - '() - (tree-union '())) - -(test-equal "tree-union, leaves only" - '(a b c d) - (tree-union '(a b c d))) - -(test-equal "tree-union, simple" - '((bin ls touch make awk gawk)) - (tree-union '((bin ls touch) - (bin make) - (bin awk gawk)))) - -(test-equal "tree-union, several levels" - '((share (doc (make README) (coreutils README))) - (bin ls touch make)) - (tree-union '((bin ls touch) - (share (doc (coreutils README))) - (bin make) - (share (doc (make README)))))) - -(test-equal "delete-duplicate-leaves, default" - '(bin make touch ls) - (delete-duplicate-leaves '(bin ls make touch ls))) - -(test-equal "delete-duplicate-leaves, file names" - '("doc" ("info" - "/binutils/ld.info" - "/gcc/gcc.info" - "/binutils/standards.info")) - (let ((leaf=3D? (lambda (a b) - (string=3D? (basename a) (basename b))))) - (delete-duplicate-leaves '("doc" - ("info" - "/binutils/ld.info" - "/binutils/standards.info" - "/gcc/gcc.info" - "/gcc/standards.info")) - leaf=3D?))) - (test-skip (if (and %store (false-if-exception (getaddrinfo "www.gnu.org" "80" AI_NUMERICSERV))) --=20 1.8.4 --=-=-=--