unofficial mirror of guix-devel@gnu.org 
 help / color / mirror / code / Atom feed
* [PATCH] union: Rewrite to be faster; handle symlink/directory conflicts
@ 2014-04-02 18:37 Mark H Weaver
  2014-04-02 19:20 ` Ludovic Courtès
  2014-04-03 20:15 ` Ludovic Courtès
  0 siblings, 2 replies; 5+ messages in thread
From: Mark H Weaver @ 2014-04-02 18:37 UTC (permalink / raw)
  To: guix-devel

[-- Attachment #1: Type: text/plain, Size: 138 bytes --]

This patch makes union.scm identical to the last one I posted here, and
removes the tests of procedures that no longer exist.

     Mark


[-- Attachment #2: [PATCH] union: Rewrite to be faster; handle symlink/directory conflicts --]
[-- Type: text/x-patch, Size: 14233 bytes --]

From 3f503705098745ddd54251a46b5634e78b209e5f Mon Sep 17 00:00:00 2001
From: Mark H Weaver <mhw@netris.org>
Date: Fri, 28 Mar 2014 03:54:01 -0400
Subject: [PATCH] union: Rewrite to be faster; handle symlink/directory
 conflicts.

* guix/build/union.scm: Rewrite; only 'file=?' remains unchanged.  Remove
  'tree-union' and 'delete-duplicate-leaves' exports.  Merge inputs in a
  breadth-first fashion.  Follow symlinks for purposes of making decisions
  about the merge.

* tests/union.scm: Remove tests of 'tree-union' and 'delete-duplicate-leaves'.
---
 guix/build/union.scm | 252 +++++++++++++++++----------------------------------
 tests/union.scm      |  41 ---------
 2 files changed, 85 insertions(+), 208 deletions(-)

diff --git a/guix/build/union.scm b/guix/build/union.scm
index 6e2b296..c65bea4 100644
--- a/guix/build/union.scm
+++ b/guix/build/union.scm
@@ -1,5 +1,6 @@
 ;;; GNU Guix --- Functional package management for GNU
 ;;; Copyright © 2012, 2013, 2014 Ludovic Courtès <ludo@gnu.org>
+;;; Copyright © 2014 Mark H Weaver <mhw@netris.org>
 ;;;
 ;;; This file is part of GNU Guix.
 ;;;
@@ -17,16 +18,13 @@
 ;;; along with GNU Guix.  If not, see <http://www.gnu.org/licenses/>.
 
 (define-module (guix build union)
-  #:use-module (ice-9 ftw)
   #:use-module (ice-9 match)
   #:use-module (ice-9 format)
   #:use-module (srfi srfi-1)
   #:use-module (srfi srfi-26)
   #:use-module (rnrs bytevectors)
   #:use-module (rnrs io ports)
-  #:export (tree-union
-            delete-duplicate-leaves
-            union-build))
+  #:export (union-build))
 
 ;;; Commentary:
 ;;;
@@ -35,72 +33,20 @@
 ;;;
 ;;; Code:
 
-(define (tree-union trees)
-  "Return a tree that is the union of the trees listed in TREES.  Each
-tree has the form (PARENT LEAVES ...) or just LEAF, where each leaf is
-itself a tree. "
-  (let loop ((trees trees))
-    (match trees
-      (()                                         ; nothing left
-       '())
-      (_
-       (let ((dirs   (filter pair? trees))
-             (leaves (remove pair? trees)))
-         `(,@leaves
-           ,@(fold (lambda (dir result)
-                     (cons `(,dir
-                             ,@(loop
-                                (concatenate
-                                 (filter-map (match-lambda
-                                              ((head children ...)
-                                               (and (equal? head dir)
-                                                    children)))
-                                             dirs))))
-                           result))
-                   '()
-                   (delete-duplicates (map car dirs)))))))))
-
-(define* (delete-duplicate-leaves tree
-                                  #:optional
-                                  (leaf=? equal?)
-                                  (delete-duplicates (match-lambda
-                                                      ((head _ ...) head))))
-  "Delete duplicate leaves from TREE.  Two leaves are considered equal
-when LEAF=? applied to them returns #t.  Each collision (list of leaves
-that are LEAF=?) is passed to DELETE-DUPLICATES, which must return a
-single leaf."
-  (let loop ((tree tree))
-    (match tree
-      ((dir children ...)
-       (let ((dirs   (filter pair? children))
-             (leaves (remove pair? children)))
-         (define collisions
-           (fold (lambda (leaf result)
-                   (define same?
-                     (cut leaf=? leaf <>))
-
-                   (if (any (cut find same? <>) result)
-                       result
-                       (match (filter same? leaves)
-                         ((_)
-                          result)
-                         ((collision ...)
-                          (cons collision result)))))
-                 '()
-                 leaves))
-
-         (define non-collisions
-           (filter (lambda (leaf)
-                     (match (filter (cut leaf=? leaf <>) leaves)
-                       ((_) #t)
-                       ((_ _ ..1) #f)))
-                   leaves))
-
-         `(,dir
-           ,@non-collisions
-           ,@(map delete-duplicates collisions)
-           ,@(map loop dirs))))
-      (leaf leaf))))
+(define (files-in-directory dirname)
+  (let ((dir (opendir dirname)))
+    (let loop ((files '()))
+      (match (readdir dir)
+        ((or "." "..")
+         (loop files))
+        ((? eof-object?)
+         (closedir dir)
+         (sort files string<?))
+        (file
+         (loop (cons file files)))))))
+
+(define (file-is-directory? file)
+  (eq? 'directory (stat:type (stat file))))
 
 (define (file=? file1 file2)
   "Return #t if FILE1 and FILE2 are regular files and their contents are
@@ -124,110 +70,82 @@ identical, #f otherwise."
                           (or (eof-object? n1)
                               (loop))))))))))))
 
-(define* (union-build output directories
+(define* (union-build output inputs
                       #:key (log-port (current-error-port)))
   "Build in the OUTPUT directory a symlink tree that is the union of all
-the DIRECTORIES."
-  (define (file-tree dir)
-    ;; Return the contents of DIR as a tree.
-
-    (define (others-have-it? subdir)
-      ;; Return #t if other elements of DIRECTORIES have SUBDIR.
-      (let ((subdir (substring subdir (string-length dir))))
-        (any (lambda (other)
-               (and (not (string=? other dir))
-                    (file-exists? (string-append other "/" subdir))))
-             directories)))
-
-    (match (file-system-fold (lambda (subdir stat result) ; enter?
-                               ;; No need to traverse DIR since there's
-                               ;; nothing to union it with.  Thus, we avoid
-                               ;; creating a gazillon symlinks (think
-                               ;; share/emacs/24.3, share/texmf, etc.)
-                               (or (string=? subdir dir)
-                                   (others-have-it? subdir)))
-                             (lambda (file stat result) ; leaf
-                               (match result
-                                 (((siblings ...) rest ...)
-                                  `((,file ,@siblings) ,@rest))))
-                             (lambda (dir stat result)  ; down
-                               `(() ,@result))
-                             (lambda (dir stat result)  ; up
-                               (match result
-                                 (((leaves ...) (siblings ...) rest ...)
-                                  `(((,(basename dir) ,@leaves) ,@siblings)
-                                    ,@rest))))
-                             (lambda (dir stat result)  ; skip
-                               ;; DIR is not available elsewhere, so treat it
-                               ;; as a leaf.
-                               (match result
-                                 (((siblings ...) rest ...)
-                                  `((,dir ,@siblings) ,@rest))))
-                             (lambda (file stat errno result)
-                               (format (current-error-port) "union-build: ~a: ~a~%"
-                                       file (strerror errno)))
-                             '(())
-                             dir)
-      (((tree)) tree)
-      (()       #f)))
-
-  (define tree-leaves
-    ;; Return the leaves of the given tree.
-    (match-lambda
-     (((? string?) leaves ...)
-      leaves)))
-
-  (define (leaf=? a b)
-    (equal? (basename a) (basename b)))
-
-  (define (resolve-collision leaves)
-    ;; LEAVES all have the same basename, so choose one of them.
-    (match (delete-duplicates leaves string=?)
-      ((one-and-the-same)
-       ;; LEAVES all actually point to the same file, so nothing to worry
-       ;; about.
-       one-and-the-same)
-      ((and lst (head rest ...))
-       ;; A real collision, unless those files are all identical.
-       (unless (every (cut file=? head <>) rest)
-         (format (current-error-port) "warning: collision encountered: ~{~a ~}~%"
-                 lst)
-
-         ;; TODO: Implement smarter strategies.
-         (format (current-error-port) "warning: arbitrarily choosing ~a~%"
-                 head))
-       head)))
+the INPUTS."
+
+  (define (symlink* input output)
+    (format log-port "`~a' ~~> `~a'~%" input output)
+    (symlink input output))
+
+  (define (resolve-collisions output dirs files)
+    (cond ((null? dirs)
+           ;; The inputs are all files.
+           (format (current-error-port)
+                   "warning: collision encountered: ~{~a ~}~%"
+                   files)
+
+           (let ((file (first files)))
+             ;; TODO: Implement smarter strategies.
+             (format (current-error-port)
+                     "warning: arbitrarily choosing ~a~%"
+                     file)
+
+             (symlink* file output)))
+
+          (else
+           ;; The inputs are a mixture of files and directories
+           (error "union-build: collision between file and directories"
+                  `((files ,files) (dirs ,dirs))))))
+
+  (define (union output inputs)
+    (match inputs
+      ((input)
+       ;; There's only one input, so just make a link.
+       (symlink* input output))
+      (_
+       (call-with-values (lambda () (partition file-is-directory? inputs))
+         (match-lambda*
+           ((dirs ())
+            ;; All inputs are directories.  Create a new directory
+            ;; where we will merge the input directories.
+            (mkdir output)
+
+            ;; Build a hash table mapping each file to a list of input
+            ;; directories containing that file.
+            (let ((table (make-hash-table)))
+
+              (define (add-to-table! file dir)
+                (hash-set! table file (cons dir (hash-ref table file '()))))
+
+              ;; Populate the table.
+              (for-each (lambda (dir)
+                          (for-each (cut add-to-table! <> dir)
+                                    (files-in-directory dir)))
+                        dirs)
+
+              ;; Now iterate over the table and recursively
+              ;; perform a union for each entry.
+              (hash-for-each (lambda (file dirs-with-file)
+                               (union (string-append output "/" file)
+                                      (map (cut string-append <> "/" file)
+                                           (reverse dirs-with-file))))
+                             table)))
+
+           ((() (file (? (cut file=? <> file)) ...))
+            ;; There are no directories, and all files have the same contents,
+            ;; so there's no conflict.
+            (symlink* file output))
+
+           ((dirs files)
+            (resolve-collisions output dirs files)))))))
 
   (setvbuf (current-output-port) _IOLBF)
   (setvbuf (current-error-port) _IOLBF)
   (when (file-port? log-port)
     (setvbuf log-port _IOLBF))
 
-  (mkdir output)
-  (let loop ((tree (delete-duplicate-leaves
-                    (cons "."
-                          (tree-union
-                           (append-map (compose tree-leaves file-tree)
-                                       (delete-duplicates directories))))
-                    leaf=?
-                    resolve-collision))
-             (dir  '()))
-    (match tree
-      ((? string?)
-       ;; A leaf: create a symlink.
-       (let* ((dir    (string-join dir "/"))
-              (target (string-append output "/" dir "/" (basename tree))))
-         (format log-port "`~a' ~~> `~a'~%" tree target)
-         (symlink tree target)))
-      (((? string? subdir) leaves ...)
-       ;; A sub-directory: create it in OUTPUT, and iterate over LEAVES.
-       (unless (string=? subdir ".")
-         (let ((dir (string-join dir "/")))
-           (mkdir (string-append output "/" dir "/" subdir))))
-       (for-each (cute loop <> `(,@dir ,subdir))
-                 leaves))
-      ((leaves ...)
-       ;; A series of leaves: iterate over them.
-       (for-each (cut loop <> dir) leaves)))))
+  (union output (delete-duplicates inputs)))
 
 ;;; union.scm ends here
diff --git a/tests/union.scm b/tests/union.scm
index 3ebf483..f63329a 100644
--- a/tests/union.scm
+++ b/tests/union.scm
@@ -43,47 +43,6 @@
 \f
 (test-begin "union")
 
-(test-equal "tree-union, empty"
-  '()
-  (tree-union '()))
-
-(test-equal "tree-union, leaves only"
-  '(a b c d)
-  (tree-union '(a b c d)))
-
-(test-equal "tree-union, simple"
-  '((bin ls touch make awk gawk))
-  (tree-union '((bin ls touch)
-                (bin make)
-                (bin awk gawk))))
-
-(test-equal "tree-union, several levels"
-  '((share (doc (make README) (coreutils README)))
-    (bin ls touch make))
-  (tree-union '((bin ls touch)
-                (share (doc (coreutils README)))
-                (bin make)
-                (share (doc (make README))))))
-
-(test-equal "delete-duplicate-leaves, default"
-  '(bin make touch ls)
-  (delete-duplicate-leaves '(bin ls make touch ls)))
-
-(test-equal "delete-duplicate-leaves, file names"
-  '("doc" ("info"
-           "/binutils/ld.info"
-           "/gcc/gcc.info"
-           "/binutils/standards.info"))
-  (let ((leaf=? (lambda (a b)
-                  (string=? (basename a) (basename b)))))
-    (delete-duplicate-leaves '("doc"
-                               ("info"
-                                "/binutils/ld.info"
-                                "/binutils/standards.info"
-                                "/gcc/gcc.info"
-                                "/gcc/standards.info"))
-                             leaf=?)))
-
 (test-skip (if (and %store
                     (false-if-exception
                      (getaddrinfo "www.gnu.org" "80" AI_NUMERICSERV)))
-- 
1.8.4


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] union: Rewrite to be faster; handle symlink/directory conflicts
  2014-04-02 18:37 [PATCH] union: Rewrite to be faster; handle symlink/directory conflicts Mark H Weaver
@ 2014-04-02 19:20 ` Ludovic Courtès
  2014-04-03 20:15 ` Ludovic Courtès
  1 sibling, 0 replies; 5+ messages in thread
From: Ludovic Courtès @ 2014-04-02 19:20 UTC (permalink / raw)
  To: Mark H Weaver; +Cc: guix-devel

Mark H Weaver <mhw@netris.org> skribis:

> This patch makes union.scm identical to the last one I posted here, and
> removes the tests of procedures that no longer exist.

Cool, please push!  I’ll look into adding some tests.

Ludo’.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] union: Rewrite to be faster; handle symlink/directory conflicts
  2014-04-02 18:37 [PATCH] union: Rewrite to be faster; handle symlink/directory conflicts Mark H Weaver
  2014-04-02 19:20 ` Ludovic Courtès
@ 2014-04-03 20:15 ` Ludovic Courtès
  2014-04-03 22:04   ` [PATCH] union: Ensure that the output is always a directory Mark H Weaver
  1 sibling, 1 reply; 5+ messages in thread
From: Ludovic Courtès @ 2014-04-03 20:15 UTC (permalink / raw)
  To: Mark H Weaver; +Cc: guix-devel

Mark H Weaver <mhw@netris.org> skribis:

> From 3f503705098745ddd54251a46b5634e78b209e5f Mon Sep 17 00:00:00 2001
> From: Mark H Weaver <mhw@netris.org>
> Date: Fri, 28 Mar 2014 03:54:01 -0400
> Subject: [PATCH] union: Rewrite to be faster; handle symlink/directory
>  conflicts.
>
> * guix/build/union.scm: Rewrite; only 'file=?' remains unchanged.  Remove
>   'tree-union' and 'delete-duplicate-leaves' exports.  Merge inputs in a
>   breadth-first fashion.  Follow symlinks for purposes of making decisions
>   about the merge.
>
> * tests/union.scm: Remove tests of 'tree-union' and 'delete-duplicate-leaves'.

I stumbled upon this bug: building a profile with a single package
fails, because the new union.scm makes the profile directory a symlink
to that package, which then prevents the creation of the ‘manifest’
file:

--8<---------------cut here---------------start------------->8---
$ guix package -i idutils -p foo
The following package will be installed:        
   idutils-4.6  out     /gnu/store/p86dq5svczkmk2ym8rd8xvvv9089hq51-idutils-4.6

The following derivation will be built:
   /gnu/store/9iyyi8mzggmycs27wsmsgx7fn3sfq40l-profile.drv

[...]

 883: 1 [call-with-output-file "/gnu/store/3za8jyx93m12lzn0278d6m8bprni2nrn-profile/manifest" ...]
In unknown file:
   ?: 0 [open-file "/gnu/store/3za8jyx93m12lzn0278d6m8bprni2nrn-profile/manifest" ...]

ERROR: In procedure open-file:
ERROR: In procedure open-file: Permission denied: "/gnu/store/3za8jyx93m12lzn0278d6m8bprni2nrn-profile/manifest"
builder for `/gnu/store/9iyyi8mzggmycs27wsmsgx7fn3sfq40l-profile.drv' failed with exit code 1
@ build-failed /gnu/store/9iyyi8mzggmycs27wsmsgx7fn3sfq40l-profile.drv - 1 builder for `/gnu/store/9iyyi8mzggmycs27wsmsgx7fn3sfq40l-profile.drv' failed with exit code 1
guix package: error: build failed: build of `/gnu/store/9iyyi8mzggmycs27wsmsgx7fn3sfq40l-profile.drv' failed
--8<---------------cut here---------------end--------------->8---

(We can’t catch it in the test suite because tests work on a local store
that is writable.)

Could you look into it?

Ludo’.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH] union: Ensure that the output is always a directory
  2014-04-03 20:15 ` Ludovic Courtès
@ 2014-04-03 22:04   ` Mark H Weaver
  2014-04-03 22:10     ` Ludovic Courtès
  0 siblings, 1 reply; 5+ messages in thread
From: Mark H Weaver @ 2014-04-03 22:04 UTC (permalink / raw)
  To: Ludovic Courtès; +Cc: guix-devel

[-- Attachment #1: Type: text/plain, Size: 350 bytes --]

ludo@gnu.org (Ludovic Courtès) writes:

> I stumbled upon this bug: building a profile with a single package
> fails, because the new union.scm makes the profile directory a symlink
> to that package, which then prevents the creation of the ‘manifest’
> file:

Indeed!  Here's a patch that fixes that problem.

   Thanks,
     Mark


[-- Attachment #2: [PATCH] union: Ensure that the output is always a directory --]
[-- Type: text/x-patch, Size: 3943 bytes --]

From 6c8582bc41c8935b8e9d63146595e87d9b92fa95 Mon Sep 17 00:00:00 2001
From: Mark H Weaver <mhw@netris.org>
Date: Thu, 3 Apr 2014 17:49:20 -0400
Subject: [PATCH] union: Ensure that the output is always a directory.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes the creation of single-package profiles, reported by Ludovic Courtès.

* guix/build/union.scm (union-build): Add new internal procedure
  'union-of-directories' that always creates a directory, containing the code
  previously used only to merge multiple directories.  Call it from the
  multiple-directory case in 'union' and from the top-level 'union-build'.
---
 guix/build/union.scm | 53 +++++++++++++++++++++++++++-------------------------
 1 file changed, 28 insertions(+), 25 deletions(-)

diff --git a/guix/build/union.scm b/guix/build/union.scm
index c65bea4..ccd2d5c 100644
--- a/guix/build/union.scm
+++ b/guix/build/union.scm
@@ -108,30 +108,8 @@ the INPUTS."
        (call-with-values (lambda () (partition file-is-directory? inputs))
          (match-lambda*
            ((dirs ())
-            ;; All inputs are directories.  Create a new directory
-            ;; where we will merge the input directories.
-            (mkdir output)
-
-            ;; Build a hash table mapping each file to a list of input
-            ;; directories containing that file.
-            (let ((table (make-hash-table)))
-
-              (define (add-to-table! file dir)
-                (hash-set! table file (cons dir (hash-ref table file '()))))
-
-              ;; Populate the table.
-              (for-each (lambda (dir)
-                          (for-each (cut add-to-table! <> dir)
-                                    (files-in-directory dir)))
-                        dirs)
-
-              ;; Now iterate over the table and recursively
-              ;; perform a union for each entry.
-              (hash-for-each (lambda (file dirs-with-file)
-                               (union (string-append output "/" file)
-                                      (map (cut string-append <> "/" file)
-                                           (reverse dirs-with-file))))
-                             table)))
+            ;; All inputs are directories.
+            (union-of-directories output dirs))
 
            ((() (file (? (cut file=? <> file)) ...))
             ;; There are no directories, and all files have the same contents,
@@ -141,11 +119,36 @@ the INPUTS."
            ((dirs files)
             (resolve-collisions output dirs files)))))))
 
+  (define (union-of-directories output dirs)
+    ;; Create a new directory where we will merge the input directories.
+    (mkdir output)
+
+    ;; Build a hash table mapping each file to a list of input
+    ;; directories containing that file.
+    (let ((table (make-hash-table)))
+
+      (define (add-to-table! file dir)
+        (hash-set! table file (cons dir (hash-ref table file '()))))
+
+      ;; Populate the table.
+      (for-each (lambda (dir)
+                  (for-each (cut add-to-table! <> dir)
+                            (files-in-directory dir)))
+                dirs)
+
+      ;; Now iterate over the table and recursively
+      ;; perform a union for each entry.
+      (hash-for-each (lambda (file dirs-with-file)
+                       (union (string-append output "/" file)
+                              (map (cut string-append <> "/" file)
+                                   (reverse dirs-with-file))))
+                     table)))
+
   (setvbuf (current-output-port) _IOLBF)
   (setvbuf (current-error-port) _IOLBF)
   (when (file-port? log-port)
     (setvbuf log-port _IOLBF))
 
-  (union output (delete-duplicates inputs)))
+  (union-of-directories output (delete-duplicates inputs)))
 
 ;;; union.scm ends here
-- 
1.8.4


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] union: Ensure that the output is always a directory
  2014-04-03 22:04   ` [PATCH] union: Ensure that the output is always a directory Mark H Weaver
@ 2014-04-03 22:10     ` Ludovic Courtès
  0 siblings, 0 replies; 5+ messages in thread
From: Ludovic Courtès @ 2014-04-03 22:10 UTC (permalink / raw)
  To: Mark H Weaver; +Cc: guix-devel

Mark H Weaver <mhw@netris.org> skribis:

> From 6c8582bc41c8935b8e9d63146595e87d9b92fa95 Mon Sep 17 00:00:00 2001
> From: Mark H Weaver <mhw@netris.org>
> Date: Thu, 3 Apr 2014 17:49:20 -0400
> Subject: [PATCH] union: Ensure that the output is always a directory.
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
>
> Fixes the creation of single-package profiles, reported by Ludovic Courtès.
>
> * guix/build/union.scm (union-build): Add new internal procedure
>   'union-of-directories' that always creates a directory, containing the code
>   previously used only to merge multiple directories.  Call it from the
>   multiple-directory case in 'union' and from the top-level 'union-build'.

Looks good to me.

Can you run ‘make check TESTS=tests/guix-package.sh’, and push if it’s
OK?  (Actually that test was catching the problem.)

Thanks for the quick fix!

Ludo’.

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2014-04-03 22:10 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-04-02 18:37 [PATCH] union: Rewrite to be faster; handle symlink/directory conflicts Mark H Weaver
2014-04-02 19:20 ` Ludovic Courtès
2014-04-03 20:15 ` Ludovic Courtès
2014-04-03 22:04   ` [PATCH] union: Ensure that the output is always a directory Mark H Weaver
2014-04-03 22:10     ` Ludovic Courtès

Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/guix.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).