From c7b2ec1c58adf8c795df0a6aaf075dbc331f41e8 Mon Sep 17 00:00:00 2001 From: Maxim Cournoyer Date: Thu, 27 May 2021 08:44:44 -0400 Subject: [PATCH 1/2] offload: Parallelize machine check in offload test. * guix/scripts/offload.scm (check-machine-availability): Refactor so that it takes a single machine object. Ensure the cleanup code is always run. (check-machines-availability): New procedure. Call CHECK-MACHINES-AVAILABILITY in parallel, which improves performance (about twice as fast with 4 build machines, from ~30 s to ~15 s). --- guix/scripts/offload.scm | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/guix/scripts/offload.scm b/guix/scripts/offload.scm index 835078cb97..b0fd20e158 100644 --- a/guix/scripts/offload.scm +++ b/guix/scripts/offload.scm @@ -1,7 +1,7 @@ ;;; GNU Guix --- Functional package management for GNU ;;; Copyright © 2014, 2015, 2016, 2017, 2018, 2019, 2020 Ludovic Courtès ;;; Copyright © 2017 Ricardo Wurmus -;;; Copyright © 2020 Maxim Cournoyer +;;; Copyright © 2020, 2021 Maxim Cournoyer ;;; Copyright © 2020 Julien Lepiller ;;; ;;; This file is part of GNU Guix. @@ -53,6 +53,7 @@ #:use-module (ice-9 regex) #:use-module (ice-9 format) #:use-module (ice-9 binary-ports) + #:use-module (ice-9 threads) #:export (build-machine build-machine? build-machine-name @@ -684,7 +685,7 @@ daemon is not running." (leave (G_ "failed to import '~a' from '~a'~%") item name))))) -(define (check-machine-availability machine-file pred) +(define (check-machines-availability machine-file pred) "Check that each machine matching PRED in MACHINE-FILE is usable as a build machine." (define (build-machine=? m1 m2) @@ -696,18 +697,28 @@ machine." (let ((machines (filter pred (delete-duplicates (build-machines machine-file) build-machine=?)))) - (info (G_ "testing ~a build machines defined in '~a'...~%") + (info (G_ "Testing ~a build machines defined in '~a'...~%") (length machines) machine-file) - (let* ((names (map build-machine-name machines)) - (sockets (map build-machine-daemon-socket machines)) - (sessions (map (cut open-ssh-session <> %short-timeout) machines)) - (nodes (map remote-inferior sessions))) - (for-each assert-node-has-guix nodes names) - (for-each assert-node-repl nodes names) - (for-each assert-node-can-import sessions nodes names sockets) - (for-each assert-node-can-export sessions nodes names sockets) - (for-each close-inferior nodes) - (for-each disconnect! sessions)))) + (par-for-each check-machine-availability machines))) + +(define (check-machine-availability machine) + "Check whether MACHINE is available. Exit with an error upon failure." + ;; Sometimes, the machine remote port may return EOF, presumably because the + ;; connection was lost. Retry up to 3 times. + (let* ((name (build-machine-name machine)) + (socket (build-machine-daemon-socket machine)) + (session (open-ssh-session machine %short-timeout)) + (node (remote-inferior session))) + (dynamic-wind + (lambda () #t) + (lambda () + (assert-node-has-guix node name) + (assert-node-repl node name) + (assert-node-can-import session node name socket) + (assert-node-can-export session node name socket)) + (lambda () + (close-inferior node) + (disconnect! session))))) (define (check-machine-status machine-file pred) "Print the load of each machine matching PRED in MACHINE-FILE." @@ -824,7 +835,7 @@ machine." ((file) (values file (const #t))) (() (values %machine-file (const #t))) (x (leave (G_ "wrong number of arguments~%")))))) - (check-machine-availability (or file %machine-file) pred)))) + (check-machines-availability (or file %machine-file) pred)))) (("status" rest ...) (with-error-handling (let-values (((file pred) -- 2.31.1