* [bug#31618] [PATCH 2/4] Add (gnu store database).
2018-05-28 10:36 ` [bug#31618] [PATCH 1/4] build: Check for Guile-SQLite3 Ludovic Courtès
@ 2018-05-28 10:36 ` Ludovic Courtès
2018-05-28 10:36 ` [bug#31618] [PATCH 3/4] database: 'register-path' resets timestamps Ludovic Courtès
2018-05-28 10:36 ` [bug#31618] [PATCH 4/4] Add (guix store deduplication) Ludovic Courtès
2 siblings, 0 replies; 6+ messages in thread
From: Ludovic Courtès @ 2018-05-28 10:36 UTC (permalink / raw)
To: 31618; +Cc: Caleb Ristvedt
From: Caleb Ristvedt <caleb.ristvedt@cune.org>
* guix/config.scm.in (%store-database-directory): New variable.
* guix/store/database.scm: New file.
* tests/store-database.scm: New file.
* Makefile.am (STORE_MODULES): New variable.
(MODULES, MODULES_NOT_COMPILED): Adjust accordingly.
(SCM_TESTS) [HAVE_GUILE_SQLITE3]: Add tests/store-database.scm.
Co-authored-by: Ludovic Courtès <ludo@gnu.org>
---
.dir-locals.el | 2 +
Makefile.am | 17 +++
guix/config.scm.in | 6 +
guix/store/database.scm | 234 +++++++++++++++++++++++++++++++++++++++
tests/store-database.scm | 54 +++++++++
5 files changed, 313 insertions(+)
create mode 100644 guix/store/database.scm
create mode 100644 tests/store-database.scm
diff --git a/.dir-locals.el b/.dir-locals.el
index dac6cb145..a993cbcf8 100644
--- a/.dir-locals.el
+++ b/.dir-locals.el
@@ -74,6 +74,8 @@
(eval . (put 'wrap-program 'scheme-indent-function 1))
(eval . (put 'with-imported-modules 'scheme-indent-function 1))
+ (eval . (put 'with-database 'scheme-indent-function 2))
+
(eval . (put 'call-with-container 'scheme-indent-function 1))
(eval . (put 'container-excursion 'scheme-indent-function 1))
(eval . (put 'eventually 'scheme-indent-function 1))
diff --git a/Makefile.am b/Makefile.am
index 2a0a85842..d81fce558 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -257,6 +257,16 @@ MODULES += \
endif BUILD_DAEMON_OFFLOAD
+# Scheme implementation of the build daemon and related functionality.
+STORE_MODULES = \
+ guix/store/database.scm
+
+if HAVE_GUILE_SQLITE3
+MODULES += $(STORE_MODULES)
+else
+MODULES_NOT_COMPILED += $(STORE_MODULES)
+endif !HAVE_GUILE_SQLITE3
+
# Internal modules with test suite support.
dist_noinst_DATA = guix/tests.scm guix/tests/http.scm
@@ -379,6 +389,13 @@ SCM_TESTS += \
endif
+if HAVE_GUILE_SQLITE3
+
+SCM_TESTS += \
+ tests/store-database.scm
+
+endif
+
SH_TESTS = \
tests/guix-build.sh \
tests/guix-download.sh \
diff --git a/guix/config.scm.in b/guix/config.scm.in
index 8f2c4abd8..dfe5fe0db 100644
--- a/guix/config.scm.in
+++ b/guix/config.scm.in
@@ -1,5 +1,6 @@
;;; GNU Guix --- Functional package management for GNU
;;; Copyright © 2012, 2013, 2014, 2015, 2016 Ludovic Courtès <ludo@gnu.org>
+;;; Copyright © 2017 Caleb Ristvedt <caleb.ristvedt@cune.org>
;;;
;;; This file is part of GNU Guix.
;;;
@@ -29,6 +30,7 @@
%store-directory
%state-directory
+ %store-database-directory
%config-directory
%guix-register-program
@@ -80,6 +82,10 @@
(or (getenv "NIX_STATE_DIR")
(string-append %localstatedir "/guix")))
+(define %store-database-directory
+ (or (and=> (getenv "NIX_DB_DIR") canonicalize-path)
+ (string-append %state-directory "/db")))
+
(define %config-directory
;; This must match `GUIX_CONFIGURATION_DIRECTORY' as defined in `nix/local.mk'.
(or (getenv "GUIX_CONFIGURATION_DIRECTORY")
diff --git a/guix/store/database.scm b/guix/store/database.scm
new file mode 100644
index 000000000..4233219ba
--- /dev/null
+++ b/guix/store/database.scm
@@ -0,0 +1,234 @@
+;;; GNU Guix --- Functional package management for GNU
+;;; Copyright © 2017 Caleb Ristvedt <caleb.ristvedt@cune.org>
+;;; Copyright © 2018 Ludovic Courtès <ludo@gnu.org>
+;;;
+;;; This file is part of GNU Guix.
+;;;
+;;; GNU Guix is free software; you can redistribute it and/or modify it
+;;; under the terms of the GNU General Public License as published by
+;;; the Free Software Foundation; either version 3 of the License, or (at
+;;; your option) any later version.
+;;;
+;;; GNU Guix is distributed in the hope that it will be useful, but
+;;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;;; GNU General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU General Public License
+;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
+
+(define-module (guix store database)
+ #:use-module (sqlite3)
+ #:use-module (guix config)
+ #:use-module (guix serialization)
+ #:use-module (guix base16)
+ #:use-module (guix hash)
+ #:use-module (rnrs io ports)
+ #:use-module (srfi srfi-11)
+ #:use-module (srfi srfi-19)
+ #:use-module (ice-9 match)
+ #:export (sqlite-register
+ register-path))
+
+;;; Code for working with the store database directly.
+
+
+(define-syntax-rule (with-database file db exp ...)
+ "Open DB from FILE and close it when the dynamic extent of EXP... is left."
+ (let ((db (sqlite-open file)))
+ (dynamic-wind noop
+ (lambda ()
+ exp ...)
+ (lambda ()
+ (sqlite-close db)))))
+
+(define (last-insert-row-id db)
+ ;; XXX: (sqlite3) currently lacks bindings for 'sqlite3_last_insert_rowid'.
+ ;; Work around that.
+ (let* ((stmt (sqlite-prepare db "SELECT last_insert_rowid();"
+ #:cache? #t))
+ (result (sqlite-fold cons '() stmt)))
+ (sqlite-finalize stmt)
+ (match result
+ ((#(id)) id)
+ (_ #f))))
+
+(define path-id-sql
+ "SELECT id FROM ValidPaths WHERE path = :path")
+
+(define* (path-id db path)
+ "If PATH exists in the 'ValidPaths' table, return its numerical
+identifier. Otherwise, return #f."
+ (let ((stmt (sqlite-prepare db path-id-sql #:cache? #t)))
+ (sqlite-bind-arguments stmt #:path path)
+ (let ((result (sqlite-fold cons '() stmt)))
+ (sqlite-finalize stmt)
+ (match result
+ ((#(id) . _) id)
+ (_ #f)))))
+
+(define update-sql
+ "UPDATE ValidPaths SET hash = :hash, registrationTime = :time, deriver =
+:deriver, narSize = :size WHERE id = :id")
+
+(define insert-sql
+ "INSERT INTO ValidPaths (path, hash, registrationTime, deriver, narSize)
+VALUES (:path, :hash, :time, :deriver, :size)")
+
+(define* (update-or-insert db #:key path deriver hash nar-size time)
+ "The classic update-if-exists and insert-if-doesn't feature that sqlite
+doesn't exactly have... they've got something close, but it involves deleting
+and re-inserting instead of updating, which causes problems with foreign keys,
+of course. Returns the row id of the row that was modified or inserted."
+ (let ((id (path-id db path)))
+ (if id
+ (let ((stmt (sqlite-prepare db update-sql #:cache? #t)))
+ (sqlite-bind-arguments stmt #:id id
+ #:path path #:deriver deriver
+ #:hash hash #:size nar-size #:time time)
+ (sqlite-fold cons '() stmt)
+ (sqlite-finalize stmt)
+ (last-insert-row-id db))
+ (let ((stmt (sqlite-prepare db insert-sql #:cache? #t)))
+ (sqlite-bind-arguments stmt
+ #:path path #:deriver deriver
+ #:hash hash #:size nar-size #:time time)
+ (sqlite-fold cons '() stmt) ;execute it
+ (sqlite-finalize stmt)
+ (last-insert-row-id db)))))
+
+(define add-reference-sql
+ "INSERT OR IGNORE INTO Refs (referrer, reference) SELECT :referrer, id
+FROM ValidPaths WHERE path = :reference")
+
+(define (add-references db referrer references)
+ "REFERRER is the id of the referring store item, REFERENCES is a list
+containing store items being referred to. Note that all of the store items in
+REFERENCES must already be registered."
+ (let ((stmt (sqlite-prepare db add-reference-sql #:cache? #t)))
+ (for-each (lambda (reference)
+ (sqlite-reset stmt)
+ (sqlite-bind-arguments stmt #:referrer referrer
+ #:reference reference)
+ (sqlite-fold cons '() stmt) ;execute it
+ (sqlite-finalize stmt)
+ (last-insert-row-id db))
+ references)))
+
+;; XXX figure out caching of statement and database objects... later
+(define* (sqlite-register #:key db-file path (references '())
+ deriver hash nar-size)
+ "Registers this stuff in a database specified by DB-FILE. PATH is the string
+path of some store item, REFERENCES is a list of string paths which the store
+item PATH refers to (they need to be already registered!), DERIVER is a string
+path of the derivation that created the store item PATH, HASH is the
+base16-encoded sha256 hash of the store item denoted by PATH (prefixed with
+\"sha256:\") after being converted to nar form, and nar-size is the size in
+bytes of the store item denoted by PATH after being converted to nar form."
+ (with-database db-file db
+ (let ((id (update-or-insert db #:path path
+ #:deriver deriver
+ #:hash hash
+ #:nar-size nar-size
+ #:time (time-second (current-time time-utc)))))
+ (add-references db id references))))
+
+\f
+;;;
+;;; High-level interface.
+;;;
+
+;; XXX: Would it be better to just make WRITE-FILE give size as well? I question
+;; the general utility of this approach.
+(define (counting-wrapper-port output-port)
+ "Some custom ports don't implement GET-POSITION at all. But if we want to
+figure out how many bytes are being written, we will want to use that. So this
+makes a wrapper around a port which implements GET-POSITION."
+ (let ((byte-count 0))
+ (make-custom-binary-output-port "counting-wrapper"
+ (lambda (bytes offset count)
+ (set! byte-count
+ (+ byte-count count))
+ (put-bytevector output-port bytes
+ offset count)
+ count)
+ (lambda ()
+ byte-count)
+ #f
+ (lambda ()
+ (close-port output-port)))))
+
+
+(define (nar-sha256 file)
+ "Gives the sha256 hash of a file and the size of the file in nar form."
+ (let-values (((port get-hash) (open-sha256-port)))
+ (let ((wrapper (counting-wrapper-port port)))
+ (write-file file wrapper)
+ (force-output wrapper)
+ (force-output port)
+ (let ((hash (get-hash))
+ (size (port-position wrapper)))
+ (close-port wrapper)
+ (values hash size)))))
+
+;; TODO: make this canonicalize store items that are registered. This involves
+;; setting permissions and timestamps, I think. Also, run a "deduplication
+;; pass", whatever that involves. Also, handle databases not existing yet
+;; (what should the default behavior be? Figuring out how the C++ stuff
+;; currently does it sounds like a lot of grepping for global
+;; variables...). Also, return #t on success like the documentation says we
+;; should.
+
+(define* (register-path path
+ #:key (references '()) deriver prefix
+ state-directory)
+ ;; Priority for options: first what is given, then environment variables,
+ ;; then defaults. %state-directory, %store-directory, and
+ ;; %store-database-directory already handle the "environment variables /
+ ;; defaults" question, so we only need to choose between what is given and
+ ;; those.
+ "Register PATH as a valid store file, with REFERENCES as its list of
+references, and DERIVER as its deriver (.drv that led to it.) If PREFIX is
+given, it must be the name of the directory containing the new store to
+initialize; if STATE-DIRECTORY is given, it must be a string containing the
+absolute file name to the state directory of the store being initialized.
+Return #t on success.
+
+Use with care as it directly modifies the store! This is primarily meant to
+be used internally by the daemon's build hook."
+ (let* ((db-dir (cond
+ (state-directory
+ (string-append state-directory "/db"))
+ (prefix
+ ;; If prefix is specified, the value of NIX_STATE_DIR
+ ;; (which affects %state-directory) isn't supposed to
+ ;; affect db-dir, only the compile-time-customized
+ ;; default should.
+ (string-append prefix %localstatedir "/guix/db"))
+ (else
+ %store-database-directory)))
+ (store-dir (if prefix
+ ;; same situation as above
+ (string-append prefix %storedir)
+ %store-directory))
+ (to-register (if prefix
+ (string-append %storedir "/" (basename path))
+ ;; note: we assume here that if path is, for
+ ;; example, /foo/bar/gnu/store/thing.txt and prefix
+ ;; isn't given, then an environment variable has
+ ;; been used to change the store directory to
+ ;; /foo/bar/gnu/store, since otherwise real-path
+ ;; would end up being /gnu/store/thing.txt, which is
+ ;; probably not the right file in this case.
+ path))
+ (real-path (string-append store-dir "/" (basename path))))
+ (let-values (((hash nar-size)
+ (nar-sha256 real-path)))
+ (sqlite-register
+ #:db-file (string-append db-dir "/db.sqlite")
+ #:path to-register
+ #:references references
+ #:deriver deriver
+ #:hash (string-append "sha256:"
+ (bytevector->base16-string hash))
+ #:nar-size nar-size))))
diff --git a/tests/store-database.scm b/tests/store-database.scm
new file mode 100644
index 000000000..1348a75c2
--- /dev/null
+++ b/tests/store-database.scm
@@ -0,0 +1,54 @@
+;;; GNU Guix --- Functional package management for GNU
+;;; Copyright © 2017, 2018 Ludovic Courtès <ludo@gnu.org>
+;;;
+;;; This file is part of GNU Guix.
+;;;
+;;; GNU Guix is free software; you can redistribute it and/or modify it
+;;; under the terms of the GNU General Public License as published by
+;;; the Free Software Foundation; either version 3 of the License, or (at
+;;; your option) any later version.
+;;;
+;;; GNU Guix is distributed in the hope that it will be useful, but
+;;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;;; GNU General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU General Public License
+;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
+
+(define-module (test-store-database)
+ #:use-module (guix tests)
+ #:use-module ((guix store) #:hide (register-path))
+ #:use-module (guix store database)
+ #:use-module (srfi srfi-26)
+ #:use-module (srfi srfi-64))
+
+;; Test the (guix store database) module.
+
+(define %store
+ (open-connection-for-tests))
+
+\f
+(test-begin "store-database")
+
+(test-assert "register-path"
+ (let ((file (string-append (%store-prefix) "/" (make-string 32 #\f)
+ "-fake")))
+ (when (valid-path? %store file)
+ (delete-paths %store (list file)))
+ (false-if-exception (delete-file file))
+
+ (let ((ref (add-text-to-store %store "ref-of-fake" (random-text)))
+ (drv (string-append file ".drv")))
+ (call-with-output-file file
+ (cut display "This is a fake store item.\n" <>))
+ (register-path file
+ #:references (list ref)
+ #:deriver drv)
+
+ (and (valid-path? %store file)
+ (equal? (references %store file) (list ref))
+ (null? (valid-derivers %store file))
+ (null? (referrers %store file))))))
+
+(test-end "store-database")
--
2.17.0
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [bug#31618] [PATCH 4/4] Add (guix store deduplication).
2018-05-28 10:36 ` [bug#31618] [PATCH 1/4] build: Check for Guile-SQLite3 Ludovic Courtès
2018-05-28 10:36 ` [bug#31618] [PATCH 2/4] Add (gnu store database) Ludovic Courtès
2018-05-28 10:36 ` [bug#31618] [PATCH 3/4] database: 'register-path' resets timestamps Ludovic Courtès
@ 2018-05-28 10:36 ` Ludovic Courtès
2 siblings, 0 replies; 6+ messages in thread
From: Ludovic Courtès @ 2018-05-28 10:36 UTC (permalink / raw)
To: 31618; +Cc: Caleb Ristvedt
From: Caleb Ristvedt <caleb.ristvedt@cune.org>
* guix/store/database.scm (register-path): Add #:deduplicate? and call
'deduplicate' when it's true.
(counting-wrapper-port, nar-sha256): Move to...
* guix/store/deduplication.scm: ... here. New file.
* tests/store-deduplication.scm: New file.
* Makefile.am (STORE_MODULES): Add deduplication.scm.
(SCM_TESTS) [HAVE_GUILE_SQLITE3]: Add store-deduplication.scm.
Co-authored-by: Ludovic Courtès <ludo@gnu.org>
---
Makefile.am | 6 +-
guix/store/database.scm | 43 ++--------
guix/store/deduplication.scm | 148 ++++++++++++++++++++++++++++++++++
tests/store-deduplication.scm | 64 +++++++++++++++
4 files changed, 222 insertions(+), 39 deletions(-)
create mode 100644 guix/store/deduplication.scm
create mode 100644 tests/store-deduplication.scm
diff --git a/Makefile.am b/Makefile.am
index d81fce558..474575c9f 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -259,7 +259,8 @@ endif BUILD_DAEMON_OFFLOAD
# Scheme implementation of the build daemon and related functionality.
STORE_MODULES = \
- guix/store/database.scm
+ guix/store/database.scm \
+ guix/store/deduplication.scm
if HAVE_GUILE_SQLITE3
MODULES += $(STORE_MODULES)
@@ -392,7 +393,8 @@ endif
if HAVE_GUILE_SQLITE3
SCM_TESTS += \
- tests/store-database.scm
+ tests/store-database.scm \
+ tests/store-deduplication.scm
endif
diff --git a/guix/store/database.scm b/guix/store/database.scm
index b9745dbe1..3623c0e7a 100644
--- a/guix/store/database.scm
+++ b/guix/store/database.scm
@@ -21,10 +21,9 @@
#:use-module (sqlite3)
#:use-module (guix config)
#:use-module (guix serialization)
+ #:use-module (guix store deduplication)
#:use-module (guix base16)
- #:use-module (guix hash)
#:use-module (guix build syscalls)
- #:use-module (rnrs io ports)
#:use-module (srfi srfi-11)
#:use-module (srfi srfi-19)
#:use-module (ice-9 match)
@@ -140,39 +139,6 @@ bytes of the store item denoted by PATH after being converted to nar form."
;;; High-level interface.
;;;
-;; XXX: Would it be better to just make WRITE-FILE give size as well? I question
-;; the general utility of this approach.
-(define (counting-wrapper-port output-port)
- "Some custom ports don't implement GET-POSITION at all. But if we want to
-figure out how many bytes are being written, we will want to use that. So this
-makes a wrapper around a port which implements GET-POSITION."
- (let ((byte-count 0))
- (make-custom-binary-output-port "counting-wrapper"
- (lambda (bytes offset count)
- (set! byte-count
- (+ byte-count count))
- (put-bytevector output-port bytes
- offset count)
- count)
- (lambda ()
- byte-count)
- #f
- (lambda ()
- (close-port output-port)))))
-
-
-(define (nar-sha256 file)
- "Gives the sha256 hash of a file and the size of the file in nar form."
- (let-values (((port get-hash) (open-sha256-port)))
- (let ((wrapper (counting-wrapper-port port)))
- (write-file file wrapper)
- (force-output wrapper)
- (force-output port)
- (let ((hash (get-hash))
- (size (port-position wrapper)))
- (close-port wrapper)
- (values hash size)))))
-
;; TODO: Factorize with that in (gnu build install).
(define (reset-timestamps file)
"Reset the modification time on FILE and on all the files it contains, if
@@ -211,7 +177,7 @@ it's a directory."
(define* (register-path path
#:key (references '()) deriver prefix
- state-directory)
+ state-directory (deduplicate? #t))
;; Priority for options: first what is given, then environment variables,
;; then defaults. %state-directory, %store-directory, and
;; %store-database-directory already handle the "environment variables /
@@ -262,4 +228,7 @@ be used internally by the daemon's build hook."
#:deriver deriver
#:hash (string-append "sha256:"
(bytevector->base16-string hash))
- #:nar-size nar-size))))
+ #:nar-size nar-size)
+
+ (when deduplicate?
+ (deduplicate real-path hash #:store store-dir)))))
diff --git a/guix/store/deduplication.scm b/guix/store/deduplication.scm
new file mode 100644
index 000000000..4b4ac01f6
--- /dev/null
+++ b/guix/store/deduplication.scm
@@ -0,0 +1,148 @@
+;;; GNU Guix --- Functional package management for GNU
+;;; Copyright © 2017 Caleb Ristvedt <caleb.ristvedt@cune.org>
+;;; Copyright © 2018 Ludovic Courtès <ludo@gnu.org>
+;;;
+;;; This file is part of GNU Guix.
+;;;
+;;; GNU Guix is free software; you can redistribute it and/or modify it
+;;; under the terms of the GNU General Public License as published by
+;;; the Free Software Foundation; either version 3 of the License, or (at
+;;; your option) any later version.
+;;;
+;;; GNU Guix is distributed in the hope that it will be useful, but
+;;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;;; GNU General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU General Public License
+;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
+
+;;; This houses stuff we do to files when they arrive at the store - resetting
+;;; timestamps, deduplicating, etc.
+
+(define-module (guix store deduplication)
+ #:use-module (guix hash)
+ #:use-module (guix build utils)
+ #:use-module (guix base16)
+ #:use-module (srfi srfi-11)
+ #:use-module (rnrs io ports)
+ #:use-module (ice-9 ftw)
+ #:use-module (guix serialization)
+ #:export (nar-sha256
+ deduplicate))
+
+;; Would it be better to just make WRITE-FILE give size as well? I question
+;; the general utility of this approach.
+(define (counting-wrapper-port output-port)
+ "Some custom ports don't implement GET-POSITION at all. But if we want to
+figure out how many bytes are being written, we will want to use that. So this
+makes a wrapper around a port which implements GET-POSITION."
+ (let ((byte-count 0))
+ (make-custom-binary-output-port "counting-wrapper"
+ (lambda (bytes offset count)
+ (set! byte-count
+ (+ byte-count count))
+ (put-bytevector output-port bytes
+ offset count)
+ count)
+ (lambda ()
+ byte-count)
+ #f
+ (lambda ()
+ (close-port output-port)))))
+
+(define (nar-sha256 file)
+ "Gives the sha256 hash of a file and the size of the file in nar form."
+ (let-values (((port get-hash) (open-sha256-port)))
+ (let ((wrapper (counting-wrapper-port port)))
+ (write-file file wrapper)
+ (force-output wrapper)
+ (force-output port)
+ (let ((hash (get-hash))
+ (size (port-position wrapper)))
+ (close-port wrapper)
+ (values hash size)))))
+
+(define (tempname-in directory)
+ "Gives an unused temporary name under DIRECTORY. Not guaranteed to still be
+unused by the time you create anything with that name, but a good shot."
+ (let ((const-part (string-append directory "/.tmp-link-"
+ (number->string (getpid)))))
+ (let try ((guess-part
+ (number->string (random most-positive-fixnum) 16)))
+ (if (file-exists? (string-append const-part "-" guess-part))
+ (try (number->string (random most-positive-fixnum) 16))
+ (string-append const-part "-" guess-part)))))
+
+(define* (get-temp-link target #:optional (link-prefix (dirname target)))
+ "Like mkstemp!, but instead of creating a new file and giving you the name,
+it creates a new hardlink to TARGET and gives you the name. Since
+cross-filesystem hardlinks don't work, the temp link must be created on the
+same filesystem - where in that filesystem it is can be controlled by
+LINK-PREFIX."
+ (let try ((tempname (tempname-in link-prefix)))
+ (catch 'system-error
+ (lambda ()
+ (link target tempname)
+ tempname)
+ (lambda (args)
+ (if (= (system-error-errno args) EEXIST)
+ (try (tempname-in link-prefix))
+ (throw 'system-error args))))))
+
+;; There are 3 main kinds of errors we can get from hardlinking: "Too many
+;; things link to this" (EMLINK), "this link already exists" (EEXIST), and
+;; "can't fit more stuff in this directory" (ENOSPC).
+
+(define (replace-with-link target to-replace)
+ "Atomically replace the file TO-REPLACE with a link to TARGET. Note: TARGET
+and TO-REPLACE must be on the same file system."
+ (let ((temp-link (get-temp-link target (dirname to-replace))))
+ (rename-file temp-link to-replace)))
+
+(define-syntax-rule (false-if-system-error (errors ...) exp ...)
+ "Given ERRORS, a list of system error codes to ignore, evaluates EXP... and
+return #f if any of the system error codes in the given list are thrown."
+ (catch 'system-error
+ (lambda ()
+ exp ...)
+ (lambda args
+ (if (member (system-error-errno args) (list errors ...))
+ #f
+ (apply throw args)))))
+
+(define* (deduplicate path hash #:key (store %store-directory))
+ "Check if a store item with sha256 hash HASH already exists. If so,
+replace PATH with a hardlink to the already-existing one. If not, register
+PATH so that future duplicates can hardlink to it. PATH is assumed to be
+under STORE."
+ (let* ((links-directory (string-append store "/.links"))
+ (link-file (string-append links-directory "/"
+ (bytevector->base16-string hash))))
+ (mkdir-p links-directory)
+ (if (file-is-directory? path)
+ ;; Can't hardlink directories, so hardlink their atoms.
+ (for-each (lambda (file)
+ (unless (member file '("." ".."))
+ (deduplicate file (nar-sha256 file)
+ #:store store)))
+ (scandir path))
+ (if (file-exists? link-file)
+ (false-if-system-error (EMLINK)
+ (replace-with-link link-file path))
+ (catch 'system-error
+ (lambda ()
+ (link path link-file))
+ (lambda args
+ (let ((errno (system-error-errno args)))
+ (cond ((= errno EEXIST)
+ ;; Someone else put an entry for PATH in
+ ;; LINKS-DIRECTORY before we could. Let's use it.
+ (false-if-system-error (EMLINK)
+ (replace-with-link path link-file)))
+ ((= errno ENOSPC)
+ ;; There's not enough room in the directory index for
+ ;; more entries in .links, but that's fine: we can
+ ;; just stop.
+ #f)
+ (else (apply throw args))))))))))
diff --git a/tests/store-deduplication.scm b/tests/store-deduplication.scm
new file mode 100644
index 000000000..04817a193
--- /dev/null
+++ b/tests/store-deduplication.scm
@@ -0,0 +1,64 @@
+;;; GNU Guix --- Functional package management for GNU
+;;; Copyright © 2018 Ludovic Courtès <ludo@gnu.org>
+;;;
+;;; This file is part of GNU Guix.
+;;;
+;;; GNU Guix is free software; you can redistribute it and/or modify it
+;;; under the terms of the GNU General Public License as published by
+;;; the Free Software Foundation; either version 3 of the License, or (at
+;;; your option) any later version.
+;;;
+;;; GNU Guix is distributed in the hope that it will be useful, but
+;;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;;; GNU General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU General Public License
+;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
+
+(define-module (test-store-deduplication)
+ #:use-module (guix tests)
+ #:use-module (guix store deduplication)
+ #:use-module (guix hash)
+ #:use-module ((guix utils) #:select (call-with-temporary-directory))
+ #:use-module (guix build utils)
+ #:use-module (rnrs bytevectors)
+ #:use-module (ice-9 binary-ports)
+ #:use-module (srfi srfi-1)
+ #:use-module (srfi srfi-64))
+
+(test-begin "store-deduplication")
+
+(test-equal "deduplicate"
+ (cons* #t #f ;inode comparisons
+ 2 (make-list 5 6)) ;'nlink' values
+
+ (call-with-temporary-directory
+ (lambda (store)
+ (let ((data (string->utf8 "Hello, world!"))
+ (identical (map (lambda (n)
+ (string-append store "/" (number->string n)))
+ (iota 5)))
+ (unique (string-append store "/unique")))
+ (for-each (lambda (file)
+ (call-with-output-file file
+ (lambda (port)
+ (put-bytevector port data))))
+ identical)
+ (call-with-output-file unique
+ (lambda (port)
+ (put-bytevector port (string->utf8 "This is unique."))))
+
+ (for-each (lambda (file)
+ (deduplicate file (sha256 data) #:store store))
+ identical)
+ (deduplicate unique (nar-sha256 unique) #:store store)
+
+ ;; (system (string-append "ls -lRia " store))
+ (cons* (apply = (map (compose stat:ino stat) identical))
+ (= (stat:ino (stat unique))
+ (stat:ino (stat (car identical))))
+ (stat:nlink (stat unique))
+ (map (compose stat:nlink stat) identical))))))
+
+(test-end "store-deduplication")
--
2.17.0
^ permalink raw reply related [flat|nested] 6+ messages in thread