unofficial mirror of bug-guix@gnu.org 
 help / color / mirror / code / Atom feed
From: "Ludovic Courtès" <ludo@gnu.org>
To: 55499@debbugs.gnu.org
Cc: "Ludovic Courtès" <ludo@gnu.org>
Subject: bug#55499: [PATCH 2/3] profiles: Do not repeat entries in 'manifest' file.
Date: Tue, 31 May 2022 18:09:15 +0200	[thread overview]
Message-ID: <20220531160916.21508-3-ludo@gnu.org> (raw)
In-Reply-To: <20220531160916.21508-1-ludo@gnu.org>

Fixes <https://issues.guix.gnu.org/55499>.
Reported by Ricardo Wurmus <rekado@elephly.net>.

With this change, the manifest file created for:

  guix install r r-seurat r-cistopic r-monocle3 r-cicero-monocle3 r-assertthat

goes from 5.6M to 192K.  Likewise, on this profile, wall-clock time of:

  GUIX_PROFILING=gc guix package -I

goes from 0.7s to 0.1s, with heap usage going from 55M to 9M.

* guix/profiles.scm (manifest->gexp)[entry->gexp]: Turn into a monadic
procedure.  Return a 'repeated' sexp if ENTRY was already visited
before.
Adjust caller accordingly.  Bump manifest version.
(sexp->manifest)[sexp->manifest-entry]: Turn into a monadic procedure.
Add case for 'repeated' nodes.  Add each entry to the current state
vhash.
Add clause for version 4 manifests.
* tests/profiles.scm ("deduplication of repeated entries"): New test.
* guix/build/profiles.scm (manifest-sexp->inputs+search-paths): Expect
version 4.  Add clause for 'repeated' nodes.
---
 guix/build/profiles.scm |   4 +-
 guix/profiles.scm       | 127 ++++++++++++++++++++++++++++------------
 tests/profiles.scm      |  42 +++++++++++++
 3 files changed, 134 insertions(+), 39 deletions(-)

diff --git a/guix/build/profiles.scm b/guix/build/profiles.scm
index f9875ca92e..c4460f624b 100644
--- a/guix/build/profiles.scm
+++ b/guix/build/profiles.scm
@@ -150,7 +150,7 @@ (define (manifest-sexp->inputs+search-paths manifest)
 values: the list of store items of its manifest entries, and the list of
 search path specifications."
   (match manifest                            ;this must match 'manifest->gexp'
-    (('manifest ('version 3)
+    (('manifest ('version 4)
                 ('packages (entries ...)))
      (let loop ((entries entries)
                 (inputs '())
@@ -162,6 +162,8 @@ (define (manifest-sexp->inputs+search-paths manifest)
           (loop (append rest deps)                ;breadth-first traversal
                 (cons item inputs)
                 (append paths search-paths)))
+         ((('repeated name version item) . rest)
+          (loop rest inputs search-paths))
          (()
           (values (reverse inputs)
                   (delete-duplicates
diff --git a/guix/profiles.scm b/guix/profiles.scm
index bf50c00a1e..44ff37e75b 100644
--- a/guix/profiles.scm
+++ b/guix/profiles.scm
@@ -455,31 +455,53 @@ (define (inferior->entry)
 (define (manifest->gexp manifest)
   "Return a representation of MANIFEST as a gexp."
   (define (entry->gexp entry)
-    (match entry
-      (($ <manifest-entry> name version output (? string? path)
-                           (deps ...) (search-paths ...) _ (properties ...))
-       #~(#$name #$version #$output #$path
-                 (propagated-inputs #$(map entry->gexp deps))
-                 (search-paths #$(map search-path-specification->sexp
-                                      search-paths))
-                 #$@(if (null? properties)
-                        #~()
-                        #~((properties . #$properties)))))
-      (($ <manifest-entry> name version output package
-                           (deps ...) (search-paths ...) _ (properties ...))
-       #~(#$name #$version #$output
-                 (ungexp package (or output "out"))
-                 (propagated-inputs #$(map entry->gexp deps))
-                 (search-paths #$(map search-path-specification->sexp
-                                      search-paths))
-                 #$@(if (null? properties)
-                        #~()
-                        #~((properties . #$properties)))))))
+    ;; Maintain in state monad a vhash of visited entries, indexed by their
+    ;; item, usually package objects (we cannot use the entry itself as an
+    ;; index since identical entries are usually not 'eq?').  Use that vhash
+    ;; to avoid repeating duplicate entries.  This is particularly useful in
+    ;; the presence of propagated inputs, where we could otherwise end up
+    ;; repeating large trees.
+    (mlet %state-monad ((visited (current-state)))
+      (if (match (vhash-assq (manifest-entry-item entry) visited)
+            ((_ . previous-entry)
+             (manifest-entry=? previous-entry entry))
+            (#f #f))
+          (return #~(repeated #$(manifest-entry-name entry)
+                              #$(manifest-entry-version entry)
+                              #$(manifest-entry-item entry)))
+          (mbegin %state-monad
+            (set-current-state (vhash-consq (manifest-entry-item entry)
+                                            entry visited))
+            (mlet %state-monad ((deps (mapm %state-monad entry->gexp
+                                            (manifest-entry-dependencies entry))))
+              (return
+               (match entry
+                 (($ <manifest-entry> name version output (? string? path)
+                                      (_deps ...) (search-paths ...) _ (properties ...))
+                  #~(#$name #$version #$output #$path
+                            (propagated-inputs #$deps)
+                            (search-paths #$(map search-path-specification->sexp
+                                                 search-paths))
+                            #$@(if (null? properties)
+                                   #~()
+                                   #~((properties . #$properties)))))
+                 (($ <manifest-entry> name version output package
+                                      (_deps ...) (search-paths ...) _ (properties ...))
+                  #~(#$name #$version #$output
+                            (ungexp package (or output "out"))
+                            (propagated-inputs #$deps)
+                            (search-paths #$(map search-path-specification->sexp
+                                                 search-paths))
+                            #$@(if (null? properties)
+                                   #~()
+                                   #~((properties . #$properties))))))))))))
 
   (match manifest
     (($ <manifest> (entries ...))
-     #~(manifest (version 3)
-                 (packages #$(map entry->gexp entries))))))
+     #~(manifest (version 4)
+                 (packages #$(run-with-state
+                                 (mapm %state-monad entry->gexp entries)
+                               vlist-null))))))
 
 (define (find-package name version)
   "Return a package from the distro matching NAME and possibly VERSION.  This
@@ -522,25 +544,44 @@ (define (infer-dependency item parent)
 
   (define* (sexp->manifest-entry sexp #:optional (parent (delay #f)))
     (match sexp
+      (('repeated name version path)
+       ;; This entry is the same as another one encountered earlier; look it
+       ;; up and return it.
+       (mlet %state-monad ((visited (current-state))
+                           (key -> (list name version path)))
+         (match (vhash-assoc key visited)
+           (#f
+            (raise (formatted-message
+                    (G_ "invalid repeated entry in profile: ~s")
+                    sexp)))
+           ((_ . entry)
+            (return entry)))))
       ((name version output path
              ('propagated-inputs deps)
              ('search-paths search-paths)
              extra-stuff ...)
-       ;; For each of DEPS, keep a promise pointing to ENTRY.
-       (letrec* ((deps* (map (cut sexp->manifest-entry <> (delay entry))
-                             deps))
-                 (entry (manifest-entry
-                          (name name)
-                          (version version)
-                          (output output)
-                          (item path)
-                          (dependencies deps*)
-                          (search-paths (map sexp->search-path-specification
-                                             search-paths))
-                          (parent parent)
-                          (properties (or (assoc-ref extra-stuff 'properties)
-                                          '())))))
-         entry))))
+       (mlet* %state-monad
+           ((entry -> #f)
+            (deps*    (mapm %state-monad
+                            (cut sexp->manifest-entry <> (delay entry))
+                            deps))
+            (visited  (current-state))
+            (key ->   (list name version path)))
+         (set! entry                              ;XXX: emulate 'letrec*'
+               (manifest-entry
+                 (name name)
+                 (version version)
+                 (output output)
+                 (item path)
+                 (dependencies deps*)
+                 (search-paths (map sexp->search-path-specification
+                                    search-paths))
+                 (parent parent)
+                 (properties (or (assoc-ref extra-stuff 'properties)
+                                 '()))))
+         (mbegin %state-monad
+           (set-current-state (vhash-cons key entry visited))
+           (return entry))))))
 
   (match sexp
     (('manifest ('version 0)
@@ -608,7 +649,17 @@ (define* (sexp->manifest-entry sexp #:optional (parent (delay #f)))
     ;; Version 3 represents DEPS as full-blown manifest entries.
     (('manifest ('version 3 minor-version ...)
                 ('packages (entries ...)))
-     (manifest (map sexp->manifest-entry entries)))
+     (manifest (run-with-state
+                   (mapm %state-monad sexp->manifest-entry entries)
+                 vlist-null)))
+
+    ;; Version 4 deduplicates repeated entries, as can happen with deep
+    ;; propagated input trees.
+    (('manifest ('version 4 minor-version ...)
+                ('packages (entries ...)))
+     (manifest (run-with-state
+                   (mapm %state-monad sexp->manifest-entry entries)
+                 vlist-null)))
     (_
      (raise (condition
              (&message (message "unsupported manifest format")))))))
diff --git a/tests/profiles.scm b/tests/profiles.scm
index 7e51d37ab9..3838d971c9 100644
--- a/tests/profiles.scm
+++ b/tests/profiles.scm
@@ -586,6 +586,48 @@ (define (entry->sexp entry)
                                                     #:locales? #f)))
         (return #f)))))
 
+(test-assertm "deduplication of repeated entries"
+  ;; Make sure the 'manifest' file does not duplicate identical entries.
+  ;; See <https://issues.guix.gnu.org/55499>.
+  (mlet* %store-monad ((p0 -> (dummy-package "p0"
+                                (build-system trivial-build-system)
+                                (arguments
+                                 `(#:guile ,%bootstrap-guile
+                                   #:builder (mkdir (assoc-ref %outputs "out"))))
+                                (propagated-inputs
+                                 `(("guile" ,%bootstrap-guile)))))
+                       (p1 -> (package
+                                (inherit p0)
+                                (name "p1")))
+                       (drv (profile-derivation (packages->manifest
+                                                 (list p0 p1))
+                                                #:hooks '()
+                                                #:locales? #f)))
+    (mbegin %store-monad
+      (built-derivations (list drv))
+      (let ((file     (string-append (derivation->output-path drv)
+                                     "/manifest"))
+            (manifest (profile-manifest (derivation->output-path drv))))
+        (define (contains-repeated? sexp)
+          (match sexp
+            (('repeated _ ...) #t)
+            ((lst ...) (any contains-repeated? sexp))
+            (_ #f)))
+
+        (return (and (contains-repeated? (call-with-input-file file read))
+
+                     ;; MANIFEST has two entries for %BOOTSTRAP-GUILE since
+                     ;; it's propagated both from P0 and from P1.  When
+                     ;; reading a 'repeated' node, 'read-manifest' should
+                     ;; reuse the previously-read entry so the two
+                     ;; %BOOTSTRAP-GUILE entries must be 'eq?'.
+                     (match (manifest-entries manifest)
+                       (((= manifest-entry-dependencies (dep0))
+                         (= manifest-entry-dependencies (dep1)))
+                        (and (string=? (manifest-entry-name dep0)
+                                       (package-name %bootstrap-guile))
+                             (eq? dep0 dep1))))))))))
+
 (test-assertm "no collision"
   ;; Here we have an entry that is "lowered" (its 'item' field is a store file
   ;; name) and another entry (its 'item' field is a package) that is
-- 
2.36.1





  parent reply	other threads:[~2022-05-31 16:12 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-05-18 13:53 bug#55499: excessively large manifests due to propagation Ricardo Wurmus
2022-05-24 15:30 ` Ludovic Courtès
2022-05-25  4:35   ` Ricardo Wurmus
2022-05-31 16:09 ` bug#55499: [PATCH 0/3] Make 'manifest' files more compact Ludovic Courtès
2022-05-31 16:09   ` bug#55499: [PATCH 1/3] tests: Augment profile collision test Ludovic Courtès
2022-05-31 16:09   ` Ludovic Courtès [this message]
2022-05-31 17:35     ` bug#55499: [PATCH 2/3] profiles: Do not repeat entries in 'manifest' file Maxime Devos
2022-06-01  9:38       ` Ludovic Courtès
2022-05-31 16:09   ` bug#55499: [PATCH 3/3] squash! profiles: Make all entry fields optional Ludovic Courtès
2022-06-14  7:06   ` bug#55499: excessively large manifests due to propagation Ludovic Courtès
2022-07-01 21:54     ` Ludovic Courtès

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://guix.gnu.org/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220531160916.21508-3-ludo@gnu.org \
    --to=ludo@gnu.org \
    --cc=55499@debbugs.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/guix.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).