unofficial mirror of guix-patches@gnu.org 
 help / color / mirror / code / Atom feed
From: Maxim Cournoyer <maxim.cournoyer@gmail.com>
To: 49149@debbugs.gnu.org
Cc: Maxim Cournoyer <maxim.cournoyer@gmail.com>
Subject: [bug#49149] [PATCH 5/7] pack: Prevent duplicate files in tar archives.
Date: Mon, 21 Jun 2021 02:12:03 -0400	[thread overview]
Message-ID: <20210621061205.31878-6-maxim.cournoyer@gmail.com> (raw)
In-Reply-To: <20210621061205.31878-1-maxim.cournoyer@gmail.com>

Tar translate duplicate files in the archive into hard links.  These can cause
problems, as not every tool support them; for example dpkg doesn't.

* gnu/system/file-systems.scm (reduce-directories): New procedure.
(file-prefix?): Lift the restriction on file prefix.  The procedure can be
useful for comparing relative file names.  Adjust doc.
(file-name-depth): New procedure, extracted from ...
(btrfs-store-subvolume-file-name): ... here.
* guix/scripts/pack.scm (self-contained-tarball/builder): Use
reduce-directories.
* tests/file-systems.scm ("reduce-directories"): New test.
---
 gnu/system/file-systems.scm | 56 +++++++++++++++++++++++++------------
 guix/scripts/pack.scm       |  6 ++--
 tests/file-systems.scm      |  7 ++++-
 3 files changed, 48 insertions(+), 21 deletions(-)

diff --git a/gnu/system/file-systems.scm b/gnu/system/file-systems.scm
index 464e87cb18..fb87bfc85b 100644
--- a/gnu/system/file-systems.scm
+++ b/gnu/system/file-systems.scm
@@ -55,6 +55,7 @@
             file-system-dependencies
             file-system-location
 
+            reduce-directories
             file-system-type-predicate
             btrfs-subvolume?
             btrfs-store-subvolume-file-name
@@ -231,8 +232,8 @@
   (char-set-complement (char-set #\/)))
 
 (define (file-prefix? file1 file2)
-  "Return #t if FILE1 denotes the name of a file that is a parent of FILE2,
-where both FILE1 and FILE2 are absolute file name.  For example:
+  "Return #t if FILE1 denotes the name of a file that is a parent of FILE2.
+For example:
 
   (file-prefix? \"/gnu\" \"/gnu/store\")
   => #t
@@ -240,19 +241,41 @@ where both FILE1 and FILE2 are absolute file name.  For example:
   (file-prefix? \"/gn\" \"/gnu/store\")
   => #f
 "
-  (and (string-prefix? "/" file1)
-       (string-prefix? "/" file2)
-       (let loop ((file1 (string-tokenize file1 %not-slash))
-                  (file2 (string-tokenize file2 %not-slash)))
-         (match file1
-           (()
-            #t)
-           ((head1 tail1 ...)
-            (match file2
-              ((head2 tail2 ...)
-               (and (string=? head1 head2) (loop tail1 tail2)))
-              (()
-               #f)))))))
+  (let loop ((file1 (string-tokenize file1 %not-slash))
+             (file2 (string-tokenize file2 %not-slash)))
+    (match file1
+      (()
+       #t)
+      ((head1 tail1 ...)
+       (match file2
+         ((head2 tail2 ...)
+          (and (string=? head1 head2) (loop tail1 tail2)))
+         (()
+          #f))))))
+
+(define (file-name-depth file-name)
+  (length (string-tokenize file-name %not-slash)))
+
+(define (reduce-directories file-names)
+  "Eliminate entries in FILE-NAMES that are children of other entries in
+FILE-NAMES.  This is for example useful when passing a list of files to GNU
+tar, which would otherwise descend into each directory passed and archive the
+duplicate files as hard links, which can be undesirable."
+  (let* ((file-names/sorted
+          ;; Ascending sort by file hierarchy depth, then by file name length.
+          (stable-sort (delete-duplicates file-names)
+                       (lambda (f1 f2)
+                         (let ((depth1 (file-name-depth f1))
+                               (depth2 (file-name-depth f2)))
+                           (if (= depth1 depth2)
+                               (string< f1 f2)
+                               (< depth1 depth2)))))))
+    (reverse (fold (lambda (file-name results)
+                     (if (find (cut file-prefix? <> file-name) results)
+                         results        ;parent found -- skipping
+                         (cons file-name results)))
+                   '()
+                   file-names/sorted))))
 
 (define* (file-system-device->string device #:key uuid-type)
   "Return the string representations of the DEVICE field of a <file-system>
@@ -624,9 +647,6 @@ store is located, else #f."
         s
         (string-append "/" s)))
 
-  (define (file-name-depth file-name)
-    (length (string-tokenize file-name %not-slash)))
-
   (and-let* ((btrfs-subvolume-fs (filter btrfs-subvolume? file-systems))
              (btrfs-subvolume-fs*
               (sort btrfs-subvolume-fs
diff --git a/guix/scripts/pack.scm b/guix/scripts/pack.scm
index 9d4bb9f497..8a108b7a1a 100644
--- a/guix/scripts/pack.scm
+++ b/guix/scripts/pack.scm
@@ -225,13 +225,15 @@ dependencies are registered."
                           `((guix build pack)
                             (guix build utils)
                             (guix build union)
-                            (gnu build install))
+                            (gnu build install)
+                            (gnu system file-systems))
                           #:select? import-module?)
     #~(begin
         (use-modules (guix build pack)
                      (guix build utils)
                      ((guix build union) #:select (relative-file-name))
                      (gnu build install)
+                     ((gnu system file-systems) #:select (reduce-directories))
                      (srfi srfi-1)
                      (srfi srfi-26)
                      (ice-9 match))
@@ -298,7 +300,7 @@ dependencies are registered."
 
                    ,(string-append "." (%store-directory))
 
-                   ,@(delete-duplicates
+                   ,@(reduce-directories
                       (filter-map (match-lambda
                                     (('directory directory)
                                      (string-append "." directory))
diff --git a/tests/file-systems.scm b/tests/file-systems.scm
index 7f7c373884..80acb6d5b9 100644
--- a/tests/file-systems.scm
+++ b/tests/file-systems.scm
@@ -1,6 +1,6 @@
 ;;; GNU Guix --- Functional package management for GNU
 ;;; Copyright © 2015, 2017 Ludovic Courtès <ludo@gnu.org>
-;;; Copyright © 2020 Maxim Cournoyer <maxim.cournoyer@gmail.com>
+;;; Copyright © 2020, 2021 Maxim Cournoyer <maxim.cournoyer@gmail.com>
 ;;;
 ;;; This file is part of GNU Guix.
 ;;;
@@ -50,6 +50,11 @@
                    (device "/foo")
                    (flags '(bind-mount read-only)))))))))
 
+(test-equal "reduce-directories"
+  '("./opt/gnu/" "./opt/gnuism" "a/b/c")
+  (reduce-directories '("./opt/gnu/etc" "./opt/gnu/" "./opt/gnu/bin"
+                        "./opt/gnu/lib/debug" "./opt/gnuism" "a/b/c" "a/b/c")))
+
 (test-assert "does not pull (guix config)"
   ;; This module is meant both for the host side and "build side", so make
   ;; sure it doesn't pull in (guix config), which depends on the user's
-- 
2.32.0





  parent reply	other threads:[~2021-06-21  6:13 UTC|newest]

Thread overview: 52+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-06-21  6:10 [bug#49149] [PATCH 0/7] Add deb format for guix pack Maxim Cournoyer
2021-06-21  6:11 ` [bug#49149] [PATCH 1/7] pack: Extract builder code from self-contained-tarball Maxim Cournoyer
2021-06-21  6:11   ` [bug#49149] [PATCH] tentatively reuse rlib for cargo-build-system Maxim Cournoyer
2021-06-21 20:28     ` Maxim Cournoyer
2021-06-21  6:12   ` [bug#49149] [PATCH 2/7] pack: Factorize base tar options Maxim Cournoyer
2021-06-21  6:12   ` [bug#49149] [PATCH 3/7] pack: Fix typo Maxim Cournoyer
2021-06-21  6:12   ` [bug#49149] [PATCH 4/7] pack: Improve naming of the packs store file names Maxim Cournoyer
2021-06-21 18:11     ` Maxime Devos
2021-06-22 14:03       ` Maxim Cournoyer
2021-06-23 10:22         ` Maxime Devos
2021-06-24  4:40           ` [bug#49149] [PATCH v2 1/7] pack: Extract builder code from self-contained-tarball Maxim Cournoyer
2021-06-24  4:40             ` [bug#49149] [PATCH v2 2/7] pack: Factorize base tar options Maxim Cournoyer
2021-06-24  4:40             ` [bug#49149] [PATCH v2 3/7] pack: Fix typo Maxim Cournoyer
2021-06-24  4:40             ` [bug#49149] [PATCH v2 4/7] pack: Improve naming of the packs store file names Maxim Cournoyer
2021-06-26  5:03               ` [bug#49149] [PATCH 0/7] Add deb format for guix pack Maxim Cournoyer
2021-06-30 10:13               ` Ludovic Courtès
2021-06-30 18:36                 ` Maxim Cournoyer
2021-07-01 13:26                   ` Ludovic Courtès
2021-07-04  3:21                     ` Maxim Cournoyer
2021-07-05 16:14                       ` Ludovic Courtès
2021-07-05 20:42                         ` Maxim Cournoyer
2021-06-24  4:40             ` [bug#49149] [PATCH v2 5/7] pack: Prevent duplicate files in tar archives Maxim Cournoyer
2021-06-24  4:40             ` [bug#49149] [PATCH v2 6/7] tests: pack: Fix compressor extension Maxim Cournoyer
2021-06-24  4:40             ` [bug#49149] [PATCH v2 7/7] pack: Add support for the deb format Maxim Cournoyer
2021-06-26 16:58               ` Maxime Devos
2021-06-29 19:20                 ` bug#49149: [PATCH 0/7] Add deb format for guix pack Maxim Cournoyer
2021-06-30 10:10               ` [bug#49149] " Ludovic Courtès
2021-06-24  4:44           ` [bug#49149] [PATCH 4/7] pack: Improve naming of the packs store file names Maxim Cournoyer
2021-06-23 21:16       ` [bug#49149] [PATCH 0/7] Add deb format for guix pack Ludovic Courtès
2021-06-21  6:12   ` Maxim Cournoyer [this message]
2021-06-30 10:06     ` Ludovic Courtès
2021-06-30 18:16       ` Maxim Cournoyer
2021-07-01 13:24         ` Ludovic Courtès
2021-06-21  6:12   ` [bug#49149] [PATCH 6/7] tests: pack: Fix compressor extension Maxim Cournoyer
2021-06-21  6:12   ` [bug#49149] [PATCH 7/7] pack: Add support for the deb format Maxim Cournoyer
2021-06-21 16:44 ` [bug#49149] Add deb format for guix pack jgart via Guix-patches via
2021-06-23 21:28 ` [bug#49149] [PATCH 0/7] " Ludovic Courtès
2021-06-29 17:49   ` Maxim Cournoyer
2021-06-30  9:15     ` Ludovic Courtès
2021-06-30 13:49       ` zimoun
2021-06-30 15:06         ` zimoun
2021-06-30 16:55           ` Maxim Cournoyer
2021-06-30 16:54         ` Maxim Cournoyer
2021-06-30 17:28         ` Maxim Cournoyer
2021-06-30 17:36           ` Maxim Cournoyer
2021-06-30 17:47           ` zimoun
2021-06-30 19:20             ` Maxim Cournoyer
2021-07-01 13:08               ` zimoun
2021-06-30 16:42       ` Maxim Cournoyer
2021-07-01 13:20         ` Ludovic Courtès
2021-07-01 13:52           ` zimoun
2021-07-05 16:17             ` Ludovic Courtès

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://guix.gnu.org/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210621061205.31878-6-maxim.cournoyer@gmail.com \
    --to=maxim.cournoyer@gmail.com \
    --cc=49149@debbugs.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/guix.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).