unofficial mirror of guix-patches@gnu.org 
 help / color / mirror / code / Atom feed
blob 035a4218fb1c236defab977ec8e46424f0588771 9062 bytes (raw)
name: guix/store/deduplication.scm 	 # note: path name is non-authoritative(*)

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
 
;;; GNU Guix --- Functional package management for GNU
;;; Copyright © 2017 Caleb Ristvedt <caleb.ristvedt@cune.org>
;;; Copyright © 2018, 2019, 2020 Ludovic Courtès <ludo@gnu.org>
;;;
;;; This file is part of GNU Guix.
;;;
;;; GNU Guix is free software; you can redistribute it and/or modify it
;;; under the terms of the GNU General Public License as published by
;;; the Free Software Foundation; either version 3 of the License, or (at
;;; your option) any later version.
;;;
;;; GNU Guix is distributed in the hope that it will be useful, but
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;;; GNU General Public License for more details.
;;;
;;; You should have received a copy of the GNU General Public License
;;; along with GNU Guix.  If not, see <http://www.gnu.org/licenses/>.

;;; This houses stuff we do to files when they arrive at the store - resetting
;;; timestamps, deduplicating, etc.

(define-module (guix store deduplication)
  #:use-module (gcrypt hash)
  #:use-module (guix build utils)
  #:use-module (guix build syscalls)
  #:use-module (guix base32)
  #:use-module (srfi srfi-11)
  #:use-module (rnrs io ports)
  #:use-module (ice-9 ftw)
  #:use-module (ice-9 match)
  #:use-module (guix serialization)
  #:export (nar-sha256
            deduplicate))

;; XXX: This port is used as a workaround on Guile <= 2.2.4 where
;; 'port-position' throws to 'out-of-range' when the offset is great than or
;; equal to 2^32: <https://bugs.gnu.org/32161>.
(define (counting-wrapper-port output-port)
  "Return two values: an output port that wraps OUTPUT-PORT, and a thunk to
retrieve the number of bytes written to OUTPUT-PORT."
  (let ((byte-count 0))
    (values (make-custom-binary-output-port "counting-wrapper"
                                            (lambda (bytes offset count)
                                              (put-bytevector output-port bytes
                                                              offset count)
                                              (set! byte-count
                                                (+ byte-count count))
                                              count)
                                            (lambda ()
                                              byte-count)
                                            #f
                                            (lambda ()
                                              (close-port output-port)))
            (lambda ()
              byte-count))))

(define (nar-sha256 file)
  "Gives the sha256 hash of a file and the size of the file in nar form."
  (let*-values (((port get-hash) (open-sha256-port))
                ((wrapper get-size) (counting-wrapper-port port)))
    (write-file file wrapper)
    (force-output wrapper)
    (force-output port)
    (let ((hash (get-hash))
          (size (get-size)))
      (close-port wrapper)
      (values hash size))))

(define (tempname-in directory)
  "Gives an unused temporary name under DIRECTORY. Not guaranteed to still be
unused by the time you create anything with that name, but a good shot."
  (let ((const-part (string-append directory "/.tmp-link-"
                                   (number->string (getpid)))))
    (let try ((guess-part
               (number->string (random most-positive-fixnum) 16)))
      (if (file-exists? (string-append const-part "-" guess-part))
          (try (number->string (random most-positive-fixnum) 16))
          (string-append const-part "-" guess-part)))))

(define* (get-temp-link target #:optional (link-prefix (dirname target)))
  "Like mkstemp!, but instead of creating a new file and giving you the name,
it creates a new hardlink to TARGET and gives you the name. Since
cross-file-system hardlinks don't work, the temp link must be created on the
same file system - where in that file system it is can be controlled by
LINK-PREFIX."
  (let try ((tempname (tempname-in link-prefix)))
    (catch 'system-error
      (lambda ()
        (link target tempname)
        tempname)
      (lambda args
        (if (= (system-error-errno args) EEXIST)
            (try (tempname-in link-prefix))
            (apply throw args))))))

(define (call-with-writable-file file store thunk)
  (if (string=? file store)
      (thunk)                       ;don't meddle with the store's permissions
      (let ((stat (lstat file)))
        (dynamic-wind
          (lambda ()
            (make-file-writable file))
          thunk
          (lambda ()
            (set-file-time file stat)
            (chmod file (stat:mode stat)))))))

(define-syntax-rule (with-writable-file file store exp ...)
  "Make FILE writable for the dynamic extent of EXP..., except if FILE is the
store."
  (call-with-writable-file file store (lambda () exp ...)))

;; There are 3 main kinds of errors we can get from hardlinking: "Too many
;; things link to this" (EMLINK), "this link already exists" (EEXIST), and
;; "can't fit more stuff in this directory" (ENOSPC).

(define* (canonicalize-with-link target to-replace
                                 #:key (swap-directory (dirname target))
                                 (store (%store-directory)))
  "Atomically replace the file TO-REPLACE with a link to TARGET.  Use
SWAP-DIRECTORY as the directory to store temporary hard links.  Upon ENOSPC
and EMLINK, TO-REPLACE is left unchanged.

Note: TARGET, TO-REPLACE, and SWAP-DIRECTORY must be on the same file system."
  (define temp-link
    (catch 'system-error
      (lambda ()
        (let retry ()
          (if (file-exists? target)
              (catch 'system-error
                (lambda ()
                  (get-temp-link target swap-directory))
                (lambda args
                  (let ((errno (system-error-errno args)))
                    (if (= errno ENOENT)
                        ;; either SWAP-DIRECTORY has missing directory
                        ;; components or TARGET was deleted - this is a
                        ;; fundamental ambiguity to the errno produced by
                        ;; link()
                        (if (file-exists? swap-directory)
                            ;; we must assume link failed because target doesn't
                            ;; exist, so create it.
                            (retry)
                            (apply throw args))
                        (apply throw args)))))
              (catch 'system-error
                (lambda ()
                  (link to-replace target)
                  #f)
                (lambda args
                  (if (= (system-error-errno args) EEXIST)
                      (retry)
                      (apply throw args)))))))
      (lambda args
        ;; We get ENOSPC when we can't fit an additional entry in
        ;; SWAP-DIRECTORY.  If it's EMLINK, then TARGET has reached its
        ;; maximum number of links.
        (if (memv (system-error-errno args) `(,ENOSPC ,EMLINK))
            #f
            (apply throw args)))))

  ;; If we couldn't create TEMP-LINK, that's OK: just don't do the
  ;; replacement, which means TO-REPLACE won't be deduplicated.
  (when temp-link
    (with-writable-file (dirname to-replace) store
      (catch 'system-error
        (lambda ()
          (rename-file temp-link to-replace))
        (lambda args
          (delete-file temp-link)
          (unless (= EMLINK (system-error-errno args))
            (apply throw args)))))))

(define* (deduplicate path hash #:key (store (%store-directory)))
  "Check if a store item with sha256 hash HASH already exists.  If so,
replace PATH with a hardlink to the already-existing one.  If not, register
PATH so that future duplicates can hardlink to it.  PATH is assumed to be
under STORE."
  (define links-directory
    (string-append store "/.links"))

  (mkdir-p links-directory)
  (let loop ((path path)
             (type (stat:type (lstat path)))
             (hash hash))
    (if (eq? 'directory type)
        ;; Can't hardlink directories, so hardlink their atoms.
        (for-each (match-lambda
                    ((file . properties)
                     (unless (member file '("." ".."))
                       (let* ((file (string-append path "/" file))
                              (type (match (assoc-ref properties 'type)
                                      ((or 'unknown #f)
                                       (stat:type (lstat file)))
                                      (type type))))
                         (loop file type
                               (and (not (eq? 'directory type))
                                    (nar-sha256 file)))))))
                  (scandir* path))
        (let ((link-file (string-append links-directory "/"
                                        (bytevector->nix-base32-string
                                         hash))))
          (canonicalize-with-link link-file path
                                  #:swap-directory links-directory
                                  #:store store)))))

debug log:

solving 035a4218fb ...
found 035a4218fb in https://yhetil.org/guix-patches/87bljka234.fsf@cune.org/
found 0655ceb890 in https://git.savannah.gnu.org/cgit/guix.git
preparing index
index prepared:
100644 0655ceb890caeb678334f2f76230b9e2898acaef	guix/store/deduplication.scm

applying [1/1] https://yhetil.org/guix-patches/87bljka234.fsf@cune.org/
diff --git a/guix/store/deduplication.scm b/guix/store/deduplication.scm
index 0655ceb890..035a4218fb 100644

Checking patch guix/store/deduplication.scm...
Applied patch guix/store/deduplication.scm cleanly.

index at:
100644 035a4218fb1c236defab977ec8e46424f0588771	guix/store/deduplication.scm

(*) Git path names are given by the tree(s) the blob belongs to.
    Blobs themselves have no identifier aside from the hash of its contents.^

Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/guix.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).