unofficial mirror of bug-guix@gnu.org 
 help / color / mirror / code / Atom feed
From: "Ludovic Courtès" <ludo@gnu.org>
To: 44760@debbugs.gnu.org
Subject: bug#44760: [PATCH 15/15] deduplicate: Create the '.links' directory lazily.
Date: Fri, 11 Dec 2020 16:09:51 +0100	[thread overview]
Message-ID: <20201211150951.18508-6-ludo@gnu.org> (raw)
In-Reply-To: <20201211150951.18508-1-ludo@gnu.org>

This avoids repeated (mkdir-p "/gnu/store/.links") calls when
deduplicating lots of files.

* guix/store/deduplication.scm (deduplicate): Remove initial call to
'mkdir-p'.  Add ENOENT case in 'link' exception handler.  Reindent.
* tests/store-deduplication.scm ("deduplicate, ENOSPC"): Check
for (<= links 4) to account for the initial 'link' call.
---
 guix/store/deduplication.scm  | 96 ++++++++++++++++++-----------------
 tests/store-deduplication.scm |  2 +-
 2 files changed, 51 insertions(+), 47 deletions(-)

diff --git a/guix/store/deduplication.scm b/guix/store/deduplication.scm
index 8564f12107..a72a43bf79 100644
--- a/guix/store/deduplication.scm
+++ b/guix/store/deduplication.scm
@@ -159,52 +159,56 @@ under STORE."
   (define links-directory
     (string-append store "/.links"))
 
-    (mkdir-p links-directory)
-    (let loop ((path path)
-               (type (stat:type (lstat path)))
-               (hash hash))
-      (if (eq? 'directory type)
-          ;; Can't hardlink directories, so hardlink their atoms.
-          (for-each (match-lambda
-                      ((file . properties)
-                       (unless (member file '("." ".."))
-                         (let* ((file (string-append path "/" file))
-                                (type (match (assoc-ref properties 'type)
-                                        ((or 'unknown #f)
-                                         (stat:type (lstat file)))
-                                        (type type))))
-                           (loop file type
-                                 (and (not (eq? 'directory type))
-                                      (nar-sha256 file)))))))
-                    (scandir* path))
-          (let ((link-file (string-append links-directory "/"
-                                          (bytevector->nix-base32-string hash))))
-            (if (file-exists? link-file)
-                (replace-with-link link-file path
-                                   #:swap-directory links-directory
-                                   #:store store)
-                (catch 'system-error
-                  (lambda ()
-                    (link path link-file))
-                  (lambda args
-                    (let ((errno (system-error-errno args)))
-                      (cond ((= errno EEXIST)
-                             ;; Someone else put an entry for PATH in
-                             ;; LINKS-DIRECTORY before we could.  Let's use it.
-                             (replace-with-link path link-file
-                                                #:swap-directory
-                                                links-directory
-                                                #:store store))
-                            ((= errno ENOSPC)
-                             ;; There's not enough room in the directory index for
-                             ;; more entries in .links, but that's fine: we can
-                             ;; just stop.
-                             #f)
-                            ((= errno EMLINK)
-                             ;; PATH has reached the maximum number of links, but
-                             ;; that's OK: we just can't deduplicate it more.
-                             #f)
-                            (else (apply throw args)))))))))))
+  (let loop ((path path)
+             (type (stat:type (lstat path)))
+             (hash hash))
+    (if (eq? 'directory type)
+        ;; Can't hardlink directories, so hardlink their atoms.
+        (for-each (match-lambda
+                    ((file . properties)
+                     (unless (member file '("." ".."))
+                       (let* ((file (string-append path "/" file))
+                              (type (match (assoc-ref properties 'type)
+                                      ((or 'unknown #f)
+                                       (stat:type (lstat file)))
+                                      (type type))))
+                         (loop file type
+                               (and (not (eq? 'directory type))
+                                    (nar-sha256 file)))))))
+                  (scandir* path))
+        (let ((link-file (string-append links-directory "/"
+                                        (bytevector->nix-base32-string hash))))
+          (if (file-exists? link-file)
+              (replace-with-link link-file path
+                                 #:swap-directory links-directory
+                                 #:store store)
+              (catch 'system-error
+                (lambda ()
+                  (link path link-file))
+                (lambda args
+                  (let ((errno (system-error-errno args)))
+                    (cond ((= errno EEXIST)
+                           ;; Someone else put an entry for PATH in
+                           ;; LINKS-DIRECTORY before we could.  Let's use it.
+                           (replace-with-link path link-file
+                                              #:swap-directory
+                                              links-directory
+                                              #:store store))
+                          ((= errno ENOENT)
+                           ;; This most likely means that LINKS-DIRECTORY does
+                           ;; not exist.  Attempt to create it and try again.
+                           (mkdir-p links-directory)
+                           (loop path type hash))
+                          ((= errno ENOSPC)
+                           ;; There's not enough room in the directory index for
+                           ;; more entries in .links, but that's fine: we can
+                           ;; just stop.
+                           #f)
+                          ((= errno EMLINK)
+                           ;; PATH has reached the maximum number of links, but
+                           ;; that's OK: we just can't deduplicate it more.
+                           #f)
+                          (else (apply throw args)))))))))))
 
 (define (tee input len output)
   "Return a port that reads up to LEN bytes from INPUT and writes them to
diff --git a/tests/store-deduplication.scm b/tests/store-deduplication.scm
index 7b01acae24..b1c2d93bbd 100644
--- a/tests/store-deduplication.scm
+++ b/tests/store-deduplication.scm
@@ -95,7 +95,7 @@
          (lambda ()
            (set! link (lambda (old new)
                         (set! links (+ links 1))
-                        (if (<= links 3)
+                        (if (<= links 4)
                             (true-link old new)
                             (throw 'system-error "link" "~A" '("Whaaat?!")
                                    (list ENOSPC))))))
-- 
2.29.2





  parent reply	other threads:[~2020-12-11 15:12 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-11-20 11:02 bug#44760: Closure copy in ‘guix system init’ is inefficient Ludovic Courtès
2020-11-22 19:46 ` raingloom
2020-11-22 21:10   ` Ludovic Courtès
2020-12-11 15:09 ` bug#44760: [PATCH 00/15] Speed up 'guix system init' & co Ludovic Courtès
2020-12-11 15:09   ` bug#44760: [PATCH 01/15] serialization: 'fold-archive' notifies about directory processing completion Ludovic Courtès
2020-12-11 15:09   ` bug#44760: [PATCH 02/15] serialization: 'restore-file' sets canonical timestamp and permissions Ludovic Courtès
2020-12-11 15:09   ` bug#44760: [PATCH 03/15] nar: Deduplicate files right as they are restored Ludovic Courtès
2020-12-11 15:09   ` bug#44760: [PATCH 04/15] store-copy: 'populate-store' resets timestamps Ludovic Courtès
2020-12-11 15:09   ` bug#44760: [PATCH 05/15] image: 'register-closure' assumes already-reset timestamps Ludovic Courtès
2020-12-11 15:09   ` bug#44760: [PATCH 06/15] database: Remove #:reset-timestamps? from 'register-items' Ludovic Courtès
2020-12-11 15:09   ` bug#44760: [PATCH 07/15] store-copy: 'populate-store' can optionally deduplicate files Ludovic Courtès
2020-12-11 15:09   ` bug#44760: [PATCH 08/15] image: 'register-closure' leaves it up to the caller to deduplicate Ludovic Courtès
2020-12-11 15:09   ` bug#44760: [PATCH 09/15] database: Remove #:deduplicate? from 'register-items' Ludovic Courtès
2020-12-15 16:33   ` bug#44760: [PATCH 00/15] Speed up 'guix system init' & co Ludovic Courtès
2020-12-11 15:09 ` bug#44760: [PATCH 10/15] guix system: 'init' copies, resets timestamps, and deduplicates at once Ludovic Courtès
2020-12-11 15:09   ` bug#44760: [PATCH 11/15] database: Remove #:deduplicate? and #:reset-timestamps? from 'register-path' Ludovic Courtès
2020-12-11 15:09   ` bug#44760: [PATCH 12/15] system: 'init' does not recompute the hash of each store item Ludovic Courtès
2020-12-11 15:09   ` bug#44760: [PATCH 13/15] database: Remove 'register-path' Ludovic Courtès
2020-12-11 15:09   ` bug#44760: [PATCH 14/15] database: Honor 'SOURCE_DATE_EPOCH' Ludovic Courtès
2020-12-11 15:09   ` Ludovic Courtès [this message]
2020-12-15 16:38 ` bug#44760: Closure copy in ‘guix system init’ is inefficient Ludovic Courtès
2020-12-16 21:53 ` Jonathan Brielmaier
2020-12-17 13:24   ` Ludovic Courtès

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://guix.gnu.org/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201211150951.18508-6-ludo@gnu.org \
    --to=ludo@gnu.org \
    --cc=44760@debbugs.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/guix.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).