;;; GNU Guix --- Functional package management for GNU ;;; Copyright © 2017 Caleb Ristvedt ;;; Copyright © 2018, 2019, 2020 Ludovic Courtès ;;; ;;; This file is part of GNU Guix. ;;; ;;; GNU Guix is free software; you can redistribute it and/or modify it ;;; under the terms of the GNU General Public License as published by ;;; the Free Software Foundation; either version 3 of the License, or (at ;;; your option) any later version. ;;; ;;; GNU Guix is distributed in the hope that it will be useful, but ;;; WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;;; GNU General Public License for more details. ;;; ;;; You should have received a copy of the GNU General Public License ;;; along with GNU Guix. If not, see . ;;; This houses stuff we do to files when they arrive at the store - resetting ;;; timestamps, deduplicating, etc. (define-module (guix store deduplication) #:use-module (gcrypt hash) #:use-module (guix build utils) #:use-module (guix build syscalls) #:use-module (guix base32) #:use-module (srfi srfi-11) #:use-module (rnrs io ports) #:use-module (ice-9 ftw) #:use-module (ice-9 match) #:use-module (guix serialization) #:export (nar-sha256 deduplicate)) ;; XXX: This port is used as a workaround on Guile <= 2.2.4 where ;; 'port-position' throws to 'out-of-range' when the offset is great than or ;; equal to 2^32: . (define (counting-wrapper-port output-port) "Return two values: an output port that wraps OUTPUT-PORT, and a thunk to retrieve the number of bytes written to OUTPUT-PORT." (let ((byte-count 0)) (values (make-custom-binary-output-port "counting-wrapper" (lambda (bytes offset count) (put-bytevector output-port bytes offset count) (set! byte-count (+ byte-count count)) count) (lambda () byte-count) #f (lambda () (close-port output-port))) (lambda () byte-count)))) (define (nar-sha256 file) "Gives the sha256 hash of a file and the size of the file in nar form." (let*-values (((port get-hash) (open-sha256-port)) ((wrapper get-size) (counting-wrapper-port port))) (write-file file wrapper) (force-output wrapper) (force-output port) (let ((hash (get-hash)) (size (get-size))) (close-port wrapper) (values hash size)))) (define (tempname-in directory) "Gives an unused temporary name under DIRECTORY. Not guaranteed to still be unused by the time you create anything with that name, but a good shot." (let ((const-part (string-append directory "/.tmp-link-" (number->string (getpid))))) (let try ((guess-part (number->string (random most-positive-fixnum) 16))) (if (file-exists? (string-append const-part "-" guess-part)) (try (number->string (random most-positive-fixnum) 16)) (string-append const-part "-" guess-part))))) (define* (get-temp-link target #:optional (link-prefix (dirname target))) "Like mkstemp!, but instead of creating a new file and giving you the name, it creates a new hardlink to TARGET and gives you the name. Since cross-file-system hardlinks don't work, the temp link must be created on the same file system - where in that file system it is can be controlled by LINK-PREFIX." (let try ((tempname (tempname-in link-prefix))) (catch 'system-error (lambda () (link target tempname) tempname) (lambda args (if (= (system-error-errno args) EEXIST) (try (tempname-in link-prefix)) (apply throw args)))))) (define (call-with-writable-file file store thunk) (if (string=? file store) (thunk) ;don't meddle with the store's permissions (let ((stat (lstat file))) (dynamic-wind (lambda () (make-file-writable file)) thunk (lambda () (set-file-time file stat) (chmod file (stat:mode stat))))))) (define-syntax-rule (with-writable-file file store exp ...) "Make FILE writable for the dynamic extent of EXP..., except if FILE is the store." (call-with-writable-file file store (lambda () exp ...))) ;; There are 3 main kinds of errors we can get from hardlinking: "Too many ;; things link to this" (EMLINK), "this link already exists" (EEXIST), and ;; "can't fit more stuff in this directory" (ENOSPC). (define* (canonicalize-with-link target to-replace #:key (swap-directory (dirname target)) (store (%store-directory))) "Atomically replace the file TO-REPLACE with a link to TARGET. Use SWAP-DIRECTORY as the directory to store temporary hard links. Upon ENOSPC and EMLINK, TO-REPLACE is left unchanged. Note: TARGET, TO-REPLACE, and SWAP-DIRECTORY must be on the same file system." (define temp-link (catch 'system-error (lambda () (let retry () (if (file-exists? target) (catch 'system-error (lambda () (get-temp-link target swap-directory)) (lambda args (let ((errno (system-error-errno args))) (if (= errno ENOENT) ;; either SWAP-DIRECTORY has missing directory ;; components or TARGET was deleted - this is a ;; fundamental ambiguity to the errno produced by ;; link() (if (file-exists? swap-directory) ;; we must assume link failed because target doesn't ;; exist, so create it. (retry) (apply throw args)) (apply throw args))))) (catch 'system-error (lambda () (link to-replace target) #f) (lambda args (if (= (system-error-errno args) EEXIST) (retry) (apply throw args))))))) (lambda args ;; We get ENOSPC when we can't fit an additional entry in ;; SWAP-DIRECTORY. If it's EMLINK, then TARGET has reached its ;; maximum number of links. (if (memv (system-error-errno args) `(,ENOSPC ,EMLINK)) #f (apply throw args))))) ;; If we couldn't create TEMP-LINK, that's OK: just don't do the ;; replacement, which means TO-REPLACE won't be deduplicated. (when temp-link (with-writable-file (dirname to-replace) store (catch 'system-error (lambda () (rename-file temp-link to-replace)) (lambda args (delete-file temp-link) (unless (= EMLINK (system-error-errno args)) (apply throw args))))))) (define* (deduplicate path hash #:key (store (%store-directory))) "Check if a store item with sha256 hash HASH already exists. If so, replace PATH with a hardlink to the already-existing one. If not, register PATH so that future duplicates can hardlink to it. PATH is assumed to be under STORE." (define links-directory (string-append store "/.links")) (mkdir-p links-directory) (let loop ((path path) (type (stat:type (lstat path))) (hash hash)) (if (eq? 'directory type) ;; Can't hardlink directories, so hardlink their atoms. (for-each (match-lambda ((file . properties) (unless (member file '("." "..")) (let* ((file (string-append path "/" file)) (type (match (assoc-ref properties 'type) ((or 'unknown #f) (stat:type (lstat file))) (type type)))) (loop file type (and (not (eq? 'directory type)) (nar-sha256 file))))))) (scandir* path)) (let ((link-file (string-append links-directory "/" (bytevector->nix-base32-string hash)))) (canonicalize-with-link link-file path #:swap-directory links-directory #:store store)))))