From 00f9b0119d3e071f9debbbf1019518f57485b623 Mon Sep 17 00:00:00 2001 From: Maxime Devos Date: Fri, 26 Feb 2021 18:15:56 +0100 Subject: [PATCH 1/4] substitute: implement a 'hook' mechanism for defining substituters A new substituter named X can be defined in (guix scripts substitute X). This substituter will be queried for additional narinfos and will be used for downloading the substitute. Substituters are tried in-order, according to the substitute-methods daemon option. * guix/scripts/substitute.scm (%narinfo-transient-error-ttl, cache-narinfo!): export for use in the HTTP substituter. (make-substituter, substituter?): define a record type for substituters. (default-substituters, resolve-substituter) (%substituters, substituters): automatically determine which substituters to use. (fetch-narinfos): split off HTTP code to guix/scripts/substitute/http.scm and ask each substituter for narinfos. (verify-hash): split off procedure from process-substition. (verifiy-hash/unknown): stub procedure. (process-substitution): split off HTTP code to guix/scripts/substitute.scm and ask each substituter for a substitute. (%fetch-timeout, %random-state, with-timeout) (fetch, narinfo-request, at-most, http-multiple-get) (read-to-eof, narinfo-from-file, %unreachable-hosts) (%max-cached-connections) (open-connection-for-uri/cached, call-with-cached-connection) (call-with-connection-error-handling): move HTTP code to guix/scripts/substitute/http.scm. * guix/scripts/substitute/http.scm (process-substitution/http, fetch-narinfo/narinfo) (http-substituter): define the HTTP substituter. --- Makefile.am | 1 + guix/scripts/substitute.scm | 521 +++++++++++-------------------- guix/scripts/substitute/http.scm | 462 +++++++++++++++++++++++++++ tests/substitute.scm | 154 +++++++++ 4 files changed, 794 insertions(+), 344 deletions(-) create mode 100644 guix/scripts/substitute/http.scm diff --git a/Makefile.am b/Makefile.am index 394d2ef75e..54bee3653c 100644 --- a/Makefile.am +++ b/Makefile.am @@ -279,6 +279,7 @@ MODULES = \ guix/scripts/pull.scm \ guix/scripts/processes.scm \ guix/scripts/substitute.scm \ + guix/scripts/substitute/http.scm \ guix/scripts/authenticate.scm \ guix/scripts/refresh.scm \ guix/scripts/repl.scm \ diff --git a/guix/scripts/substitute.scm b/guix/scripts/substitute.scm index fcb462b47b..78345dce8f 100755 --- a/guix/scripts/substitute.scm +++ b/guix/scripts/substitute.scm @@ -3,6 +3,7 @@ ;;; Copyright © 2014 Nikita Karetnikov ;;; Copyright © 2018 Kyle Meyer ;;; Copyright © 2020 Christopher Baines +;;; Copyright © 2021 Maxime Devos ;;; ;;; This file is part of GNU Guix. ;;; @@ -33,20 +34,18 @@ #:use-module ((guix serialization) #:select (restore-file dump-file)) #:autoload (guix store deduplication) (dump-file/deduplicate) #:autoload (guix scripts discover) (read-substitute-urls) + #:autoload (guix scripts substitute http) (open-connection-for-uri/cached) #:use-module (gcrypt hash) #:use-module (guix base32) #:use-module (guix base64) #:use-module (guix cache) + #:use-module (guix progress) #:use-module (gcrypt pk-crypto) #:use-module (guix pki) - #:use-module ((guix build utils) #:select (mkdir-p dump-port)) + #:use-module ((guix build utils) #:select (mkdir-p)) #:use-module ((guix build download) - #:select (uri-abbreviation nar-uri-abbreviation - (open-connection-for-uri - . guix:open-connection-for-uri) - store-path-abbreviation byte-count->string)) - #:autoload (gnutls) (error/invalid-session) - #:use-module (guix progress) + #:select ((open-connection-for-uri + . guix:open-connection-for-uri))) #:use-module ((guix build syscalls) #:select (set-thread-name)) #:use-module (ice-9 rdelim) @@ -57,26 +56,31 @@ #:use-module (ice-9 binary-ports) #:use-module (ice-9 vlist) #:use-module (rnrs bytevectors) + #:use-module (web uri) #:use-module (srfi srfi-1) + #:use-module (srfi srfi-2) #:use-module (srfi srfi-9) #:use-module (srfi srfi-11) #:use-module (srfi srfi-19) #:use-module (srfi srfi-26) #:use-module (srfi srfi-34) #:use-module (srfi srfi-35) - #:use-module (web uri) - #:use-module (web http) - #:use-module (web request) - #:use-module (web response) - #:use-module (guix http-client) #:export (lookup-narinfos lookup-narinfos/diverse %allow-unauthenticated-substitutes? %error-to-file-descriptor-4? + %narinfo-transient-error-ttl + cache-narinfo! + substitute-urls - guix-substitute)) + guix-substitute + + make-substituter substituter? + substituters + default-substituters + resolve-substituter)) ;;; Comment: ;;; @@ -87,8 +91,91 @@ ;;; If possible, substitute a binary for the requested store path, using a Nix ;;; "binary cache". This program implements the Nix "substituter" protocol. ;;; +;;; Currently, only substitutes over HTTP, HTTPS, and from the file system +;;; are supported. At some point in the future, substitutes over IPFS and +;;; GNUnet will be implemented, however. To prepare for this future, +;;; the HTTP-specific code has been split-off to (guix scripts substitute http) +;;; and a ‘hook’ mechanism has been defined. +;;; ;;; Code: + + +;; Defining / using hooks +;; +;; A substituter named NAME must be defined in (guix scripts substitute NAME), +;; bound to a variable NAME-subtituter to be automatically found, +;; presuming it is in the present in the substitute-methods daemon option. + +(define-record-type + (make-substituter name nar-downloader fetch-narinfos recognised-uri-schemes) + substituter? + ;; The name of this substituter as a symbol. + (name substituter-name) + ;; If not #f, this is a procedure that downloads a nar. + ;; + ;; Arguments: (DESTINATION NARINFO IN-KWARG ...). + ;; Result: (PORT KWARG ...). + ;; + ;; PORT can be 'unpacked if the substituter has successfully + ;; unpacked the nar into DESTINATION. It can be #f if the + ;; substituter cannot be used for this narinfo. Alternatively, + ;; it can be an input port from which a nar can be read. + ;; + ;; KWARG ... are keyword arguments. Currently, the following + ;; are recognised: + ;; #:after-input-close (when PORT is a port): thunk to + ;; call after PORT has been closed. The HTTP substituter + ;; uses this to wait for the reporter to finish. + (nar-downloader substituter-nar-downloader) + ;; If not #f, this is a produce that attempts to download narinfos. + ;; This is only called if the URL has an URI scheme + ;; in recognised-uri-schemes. + ;; + ;; Arguments: (URL PATHS). + ;; Result: a list of narinfos. + ;; + ;; Currently, this procedure must handle caching by itself. + (fetch-narinfos substituter-narinfo-fetcher) + ;; A list of the URI schemes supported by this method. + (recognised-uri-schemes substituter-uri-schemes)) + +(define (resolve-substituter name) + "Find the substituter named NAME and return it. +If the substituter doesn't exit, return #f instead." + (and-let* ((module (resolve-module `(guix scripts substitute ,name) + #:ensure #f)) + (variable (module-variable module + (symbol-append name '-substituter))) + (substituter + (and (variable-bound? variable) + (variable-ref variable)))) + (unless (substituter? substituter) + (leave (G_ "'~a-substituter' is not a substituter~%") name)) + substituter)) + +(define (default-substituters) + "Look in the daemon options for which substituters should be used, +and returns these substituters as a list. In case not all requested +substituters could be found, emit a warning for each missing +substituter." + (define (resolve-substituter/warning name) + (or (resolve-substituter name) + (begin + (warning (G_ "~a: unknown substituter~%") name) + #f))) + (filter-map resolve-substituter/warning + (or (and=> (find-daemon-option "substitute-methods") + (compose (cut map string->symbol <>) string-tokenize)) + '(http)))) + +;; The creation of the substituters is delayed +;; to avoid cyclic dependencies. +(define %substituters (delay (make-parameter (default-substituters)))) +(define-syntax substituters (identifier-syntax (force %substituters))) + + + (define %narinfo-cache-directory ;; A local cache of narinfos, to avoid going to the network. Most of the ;; time, 'guix substitute' is called by guix-daemon as root and stores its @@ -130,45 +217,6 @@ disabled!~%")) ;; How often we want to remove files corresponding to expired cache entries. (* 7 24 3600)) -(define %fetch-timeout - ;; Number of seconds after which networking is considered "slow". - 5) - -(define %random-state - (seed->random-state (+ (ash (cdr (gettimeofday)) 32) (getpid)))) - -(define-syntax-rule (with-timeout duration handler body ...) - "Run BODY; when DURATION seconds have expired, call HANDLER, and run BODY -again." - (begin - (sigaction SIGALRM - (lambda (signum) - (sigaction SIGALRM SIG_DFL) - handler)) - (alarm duration) - (call-with-values - (lambda () - (let try () - (catch 'system-error - (lambda () - body ...) - (lambda args - ;; Before Guile v2.0.9-39-gfe51c7b, the SIGALRM triggers EINTR - ;; because of the bug at - ;; . - ;; When that happens, try again. Note: SA_RESTART cannot be - ;; used because of . - (if (= EINTR (system-error-errno args)) - (begin - ;; Wait a little to avoid bursts. - (usleep (random 3000000 %random-state)) - (try)) - (apply throw args)))))) - (lambda result - (alarm 0) - (sigaction SIGALRM SIG_DFL) - (apply values result))))) - (define (narinfo-cache-file cache-url path) "Return the name of the local file that contains an entry for PATH. The entry is stored in a sub-directory specific to CACHE-URL." @@ -240,155 +288,18 @@ indicates that PATH is unavailable at CACHE-URL." narinfo) -(define (narinfo-request cache-url path) - "Return an HTTP request for the narinfo of PATH at CACHE-URL." - (let ((url (string-append cache-url "/" (store-path-hash-part path) - ".narinfo")) - (headers '((User-Agent . "GNU Guile")))) - (build-request (string->uri url) #:method 'GET #:headers headers))) - -(define (at-most max-length lst) - "If LST is shorter than MAX-LENGTH, return it and the empty list; otherwise -return its MAX-LENGTH first elements and its tail." - (let loop ((len 0) - (lst lst) - (result '())) - (match lst - (() - (values (reverse result) '())) - ((head . tail) - (if (>= len max-length) - (values (reverse result) lst) - (loop (+ 1 len) tail (cons head result))))))) - -(define (read-to-eof port) - "Read from PORT until EOF is reached. The data are discarded." - (dump-port port (%make-void-port "w"))) - -(define (narinfo-from-file file url) - "Attempt to read a narinfo from FILE, using URL as the cache URL. Return #f -if file doesn't exist, and the narinfo otherwise." - (catch 'system-error - (lambda () - (call-with-input-file file - (cut read-narinfo <> url))) - (lambda args - (if (= ENOENT (system-error-errno args)) - #f - (apply throw args))))) - -(define %unreachable-hosts - ;; Set of names of unreachable hosts. - (make-hash-table)) - -(define* (call-with-connection-error-handling uri proc) - "Call PROC, and catch if a connection fails, print a warning and return #f." - (define host - (uri-host uri)) - - (catch #t - proc - (match-lambda* - (('getaddrinfo-error error) - (unless (hash-ref %unreachable-hosts host) - (hash-set! %unreachable-hosts host #t) ;warn only once - (warning (G_ "~a: host not found: ~a~%") - host (gai-strerror error))) - #f) - (('system-error . args) - (unless (hash-ref %unreachable-hosts host) - (hash-set! %unreachable-hosts host #t) - (warning (G_ "~a: connection failed: ~a~%") host - (strerror - (system-error-errno `(system-error ,@args))))) - #f) - (args - (apply throw args))))) - (define* (fetch-narinfos url paths #:key (open-connection guix:open-connection-for-uri)) "Retrieve all the narinfos for PATHS from the cache at URL and return them." - (define update-progress! - (let ((done 0) - (total (length paths))) - (lambda () - (display "\r\x1b[K" (current-error-port)) ;erase current line - (force-output (current-error-port)) - (format (current-error-port) - (G_ "updating substitutes from '~a'... ~5,1f%") - url (* 100. (/ done total))) - (set! done (+ 1 done))))) - - (define hash-part->path - (let ((mapping (fold (lambda (path result) - (vhash-cons (store-path-hash-part path) path - result)) - vlist-null - paths))) - (lambda (hash) - (match (vhash-assoc hash mapping) - (#f #f) - ((_ . path) path))))) - - (define (handle-narinfo-response request response port result) - (let* ((code (response-code response)) - (len (response-content-length response)) - (cache (response-cache-control response)) - (ttl (and cache (assoc-ref cache 'max-age)))) - (update-progress!) - - ;; Make sure to read no more than LEN bytes since subsequent bytes may - ;; belong to the next response. - (if (= code 200) ; hit - (let ((narinfo (read-narinfo port url #:size len))) - (if (string=? (dirname (narinfo-path narinfo)) - (%store-prefix)) - (begin - (cache-narinfo! url (narinfo-path narinfo) narinfo ttl) - (cons narinfo result)) - result)) - (let* ((path (uri-path (request-uri request))) - (hash-part (basename - (string-drop-right path 8)))) ;drop ".narinfo" - (if len - (get-bytevector-n port len) - (read-to-eof port)) - (cache-narinfo! url (hash-part->path hash-part) #f - (if (or (= 404 code) (= 202 code)) - ttl - %narinfo-transient-error-ttl)) - result)))) - - (define (do-fetch uri) - (case (and=> uri uri-scheme) - ((http https) - ;; Note: Do not check HTTPS server certificates to avoid depending - ;; on the X.509 PKI. We can do it because we authenticate - ;; narinfos, which provides a much stronger guarantee. - (let* ((requests (map (cut narinfo-request url <>) paths)) - (result (begin - (update-progress!) - (call-with-connection-error-handling - uri - (lambda () - (http-multiple-get uri - handle-narinfo-response '() - requests - #:open-connection open-connection - #:verify-certificate? #f)))))) - (newline (current-error-port)) - result)) - ((file #f) - (let* ((base (string-append (uri-path uri) "/")) - (files (map (compose (cut string-append base <> ".narinfo") - store-path-hash-part) - paths))) - (filter-map (cut narinfo-from-file <> url) files))) - (else - (leave (G_ "~s: unsupported server URI scheme~%") - (if uri (uri-scheme uri) url))))) - - (do-fetch (string->uri url))) + (define scheme (uri-scheme (string->uri url))) + (define usable-substituter? + (compose (cute memq scheme <>) substituter-uri-schemes)) + (define fetch-narinfo/substituter + (compose (cute <> url paths #:open-connection open-connection) + substituter-narinfo-fetcher)) + (define found + (map fetch-narinfo/substituter (filter usable-substituter? (substituters)))) + (concatenate found)) (define* (lookup-narinfos cache paths #:key (open-connection guix:open-connection-for-uri)) @@ -606,55 +517,29 @@ authorized substitutes." (wtf (error "unknown `--query' command" wtf)))) -(define %max-cached-connections - ;; Maximum number of connections kept in cache by - ;; 'open-connection-for-uri/cached'. - 16) - -(define open-connection-for-uri/cached - (let ((cache '())) - (lambda* (uri #:key fresh? (timeout %fetch-timeout) verify-certificate?) - "Return a connection for URI, possibly reusing a cached connection. -When FRESH? is true, delete any cached connections for URI and open a new one. -Return #f if URI's scheme is 'file' or #f. - -When true, TIMEOUT is the maximum number of milliseconds to wait for -connection establishment. When VERIFY-CERTIFICATE? is true, verify HTTPS -server certificates." - (define host (uri-host uri)) - (define scheme (uri-scheme uri)) - (define key (list host scheme (uri-port uri))) - - (and (not (memq scheme '(file #f))) - (match (assoc-ref cache key) - (#f - ;; Open a new connection to URI and evict old entries from - ;; CACHE, if any. - (let-values (((socket) - (guix:open-connection-for-uri - uri - #:verify-certificate? verify-certificate? - #:timeout timeout)) - ((new-cache evicted) - (at-most (- %max-cached-connections 1) cache))) - (for-each (match-lambda - ((_ . port) - (false-if-exception (close-port port)))) - evicted) - (set! cache (alist-cons key socket new-cache)) - socket)) - (socket - (if (or fresh? (port-closed? socket)) - (begin - (false-if-exception (close-port socket)) - (set! cache (alist-delete key cache)) - (open-connection-for-uri/cached uri #:timeout timeout - #:verify-certificate? - verify-certificate?)) - (begin - ;; Drain input left from the previous use. - (drain-input socket) - socket)))))))) +(define (verify-hash actual expected algorithm narinfo) + "Check whether we got the data announced in the narinfo NARINFO. +ACTUAL is the actual hash, and EXPECTED is the hash according +to the narinfo." + (if (bytevector=? actual expected) + ;; Tell the daemon that we're done. + (format (current-output-port) "success ~a ~a~%" + (narinfo-hash narinfo) (narinfo-size narinfo)) + ;; The actual data has a different hash than that in NARINFO. + (format (current-output-port) "hash-mismatch ~a ~a ~a~%" + (hash-algorithm-name algorithm) + (bytevector->nix-base32-string expected) + (bytevector->nix-base32-string actual)))) + +(define (verify-hash/unknown . rest) + ;; Variant of verify-hash where the hash hasn't yet been computed. + ;; TODO: this will be implemented later in the patch series! + ;; (To be used by the IPFS and GNUnet substituter) + (leave (G_ "TODO verify-hash/unknown is unimplemented~%"))) + +(define-syntax-rule (receive* kwargs exp exp* exp** ...) + (call-with-values (lambda () exp) + (lambda* kwargs exp* exp** ...))) (define* (process-substitution store-item destination #:key cache-urls acl @@ -680,107 +565,54 @@ the current output port." (apply dump-file/deduplicate (append args (list #:store (%store-prefix))))) - (define (fetch uri) - (case (uri-scheme uri) - ((file) - (let ((port (open-file (uri-path uri) "r0b"))) - (values port (stat:size (stat port))))) - ((http https) - (guard (c ((http-get-error? c) - (leave (G_ "download from '~a' failed: ~a, ~s~%") - (uri->string (http-get-error-uri c)) - (http-get-error-code c) - (http-get-error-reason c)))) - ;; Test this with: - ;; sudo tc qdisc add dev eth0 root netem delay 1500ms - ;; and then cancel with: - ;; sudo tc qdisc del dev eth0 root - (with-timeout %fetch-timeout - (begin - (warning (G_ "while fetching ~a: server is somewhat slow~%") - (uri->string uri)) - (warning (G_ "try `--no-substitutes' if the problem persists~%"))) - (call-with-connection-error-handling - uri - (lambda () - (http-fetch uri #:text? #f - #:open-connection open-connection-for-uri/cached - #:keep-alive? #t - #:buffered? #f - #:verify-certificate? #f)))))) - (else - (leave (G_ "unsupported substitute URI scheme: ~a~%") - (uri->string uri))))) - (unless narinfo (leave (G_ "no valid substitute for '~a'~%") store-item)) - (let-values (((uri compression file-size) - (narinfo-best-uri narinfo))) - (unless print-build-trace? - (format (current-error-port) - (G_ "Downloading ~a...~%") (uri->string uri))) - - (let*-values (((raw download-size) - ;; 'guix publish' without '--cache' doesn't specify a - ;; Content-Length, so DOWNLOAD-SIZE is #f in this case. - (fetch uri)) - ((progress) - (let* ((dl-size (or download-size - (and (equal? compression "none") - (narinfo-size narinfo)))) - (reporter (if print-build-trace? - (progress-reporter/trace - destination - (uri->string uri) dl-size - (current-error-port)) - (progress-reporter/file - (uri->string uri) dl-size - (current-error-port) - #:abbreviation nar-uri-abbreviation)))) - ;; Keep RAW open upon completion so we can later reuse - ;; the underlying connection. - (progress-report-port reporter raw #:close? #f))) - ((input pids) - ;; NOTE: This 'progress' port of current process will be - ;; closed here, while the child process doing the - ;; reporting will close it upon exit. - (decompressed-port (string->symbol compression) - progress)) - - ;; Compute the actual nar hash as we read it. - ((algorithm expected) - (narinfo-hash-algorithm+value narinfo)) - ((hashed get-hash) - (open-hash-input-port algorithm input))) - ;; Unpack the Nar at INPUT into DESTINATION. - (restore-file hashed destination - #:dump-file (if (and destination-in-store? - deduplicate?) - dump-file/deduplicate* - dump-file)) - (close-port hashed) - (close-port input) - - ;; Wait for the reporter to finish. - (every (compose zero? cdr waitpid) pids) - - ;; Skip a line after what 'progress-reporter/file' printed, and another - ;; one to visually separate substitutions. - (display "\n\n" (current-error-port)) - - ;; Check whether we got the data announced in NARINFO. - (let ((actual (get-hash))) - (if (bytevector=? actual expected) - ;; Tell the daemon that we're done. - (format (current-output-port) "success ~a ~a~%" - (narinfo-hash narinfo) (narinfo-size narinfo)) - ;; The actual data has a different hash than that in NARINFO. - (format (current-output-port) "hash-mismatch ~a ~a ~a~%" - (hash-algorithm-name algorithm) - (bytevector->nix-base32-string expected) - (bytevector->nix-base32-string actual))))))) + ;; Try each hook in-order, until a hook is successful. + (let loop ((hooks (filter substituter-nar-downloader (substituters)))) + (unless (pair? hooks) + (leave (G_ "no substituter for 'a'~%") + store-item)) + (receive* (input #:key after-input-close) + ((substituter-nar-downloader (car hooks)) destination narinfo + #:print-build-trace? print-build-trace?) + (cond ((not input) + (format (current-error-port) + (G_ "Substituter '~a' not applicable for '~a'.~%") + (substituter-name (car hooks)) + store-item) + ;; This hook was unusable, try the next hook. + (loop (cdr hooks))) + ((input-port? input) + ;; Compute the actual nar hash as we read it. + (let*-values (((algorithm expected) + (narinfo-hash-algorithm+value narinfo)) + ((hashed get-hash) + (open-hash-input-port algorithm input))) + ;; Unpack the Nar at INPUT into DESTINATION. + (restore-file hashed destination + #:dump-file (if (and destination-in-store? + deduplicate?) + dump-file/deduplicate* + dump-file)) + (close-port hashed) + (close-port input) + (when after-input-close + (after-input-close)) + ;; Check whether we got the data announced in NARINFO. + (verify-hash (get-hash) expected algorithm narinfo))) + ((eq? input 'unpacked) + (when after-input-close + (after-input-close)) + ;; Check whether we got the data announced in the NARINFO. + (verify-hash/unknown destination narinfo)) + (else + (format (current-error-port) "~s~%" input) + (leave + (G_ "Substituter '~a' did not produce usable output for '~a'.") + (substituter-name (car hooks)) + store-item)))))) ;;; @@ -992,6 +824,7 @@ if needed, as expected by the daemon's agent." (leave (G_ "~a: unrecognized options~%") opts))))))) ;;; Local Variables: +;;; eval: (put 'receive* 'scheme-indent-function 2) ;;; eval: (put 'with-timeout 'scheme-indent-function 1) ;;; eval: (put 'with-redirected-error-port 'scheme-indent-function 0) ;;; End: diff --git a/guix/scripts/substitute/http.scm b/guix/scripts/substitute/http.scm new file mode 100644 index 0000000000..d5dd1e4a8e --- /dev/null +++ b/guix/scripts/substitute/http.scm @@ -0,0 +1,462 @@ +;;; GNU Guix --- Functional package management for GNU +;;; Copyright © 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020 Ludovic Courtès +;;; Copyright © 2014 Nikita Karetnikov +;;; Copyright © 2018 Kyle Meyer +;;; Copyright © 2020 Christopher Baines +;;; Copyright © 2021 Maxime Devos +;;; +;;; This file is part of GNU Guix. +;;; +;;; GNU Guix is free software; you can redistribute it and/or modify it +;;; under the terms of the GNU General Public License as published by +;;; the Free Software Foundation; either version 3 of the License, or (at +;;; your option) any later version. +;;; +;;; GNU Guix is distributed in the hope that it will be useful, but +;;; WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;; GNU General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with GNU Guix. If not, see string)) + #:use-module (guix progress) + #:use-module (guix store) + #:use-module (guix scripts substitute) + #:autoload (gnutls) (error/invalid-session) + #:use-module (ice-9 match) + #:use-module (ice-9 binary-ports) + #:use-module (ice-9 vlist) + #:use-module (ice-9 format)) + +(define %fetch-timeout + ;; Number of seconds after which networking is considered "slow". + 5) + +(define %random-state + (seed->random-state (+ (ash (cdr (gettimeofday)) 32) (getpid)))) + +(define-syntax-rule (with-timeout duration handler body ...) + "Run BODY; when DURATION seconds have expired, call HANDLER, and run BODY +again." + (begin + (sigaction SIGALRM + (lambda (signum) + (sigaction SIGALRM SIG_DFL) + handler)) + (alarm duration) + (call-with-values + (lambda () + (let try () + (catch 'system-error + (lambda () + body ...) + (lambda args + ;; Before Guile v2.0.9-39-gfe51c7b, the SIGALRM triggers EINTR + ;; because of the bug at + ;; . + ;; When that happens, try again. Note: SA_RESTART cannot be + ;; used because of . + (if (= EINTR (system-error-errno args)) + (begin + ;; Wait a little to avoid bursts. + (usleep (random 3000000 %random-state)) + (try)) + (apply throw args)))))) + (lambda result + (alarm 0) + (sigaction SIGALRM SIG_DFL) + (apply values result))))) + +(define* (fetch uri #:key (buffered? #t) (timeout? #t) + (keep-alive? #f) (port #f)) + "Return a binary input port to URI and the number of bytes it's expected to +provide. + +When PORT is true, use it as the underlying I/O port for HTTP transfers; when +PORT is false, open a new connection for URI. When KEEP-ALIVE? is true, the +connection (typically PORT) is kept open once data has been fetched from URI." + (case (uri-scheme uri) + ((file) + (let ((port (open-file (uri-path uri) + (if buffered? "rb" "r0b")))) + (values port (stat:size (stat port))))) + ((http https) + (guard (c ((http-get-error? c) + (leave (G_ "download from '~a' failed: ~a, ~s~%") + (uri->string (http-get-error-uri c)) + (http-get-error-code c) + (http-get-error-reason c)))) + ;; Test this with: + ;; sudo tc qdisc add dev eth0 root netem delay 1500ms + ;; and then cancel with: + ;; sudo tc qdisc del dev eth0 root + (let ((port port)) + (with-timeout (if timeout? + %fetch-timeout + 0) + (begin + (warning (G_ "while fetching ~a: server is somewhat slow~%") + (uri->string uri)) + (warning (G_ "try `--no-substitutes' if the problem persists~%"))) + (call-with-connection-error-handling + uri + (lambda () + (http-fetch uri #:text? #f + #:open-connection open-connection-for-uri/cached + #:keep-alive? #t + #:buffered? #f + #:verify-certificate? #f))))))) + (else + (leave (G_ "unsupported substitute URI scheme: ~a~%") + (uri->string uri))))) + +(define (narinfo-request cache-url path) + "Return an HTTP request for the narinfo of PATH at CACHE-URL." + (let ((url (string-append cache-url "/" (store-path-hash-part path) + ".narinfo")) + (headers '((User-Agent . "GNU Guile")))) + (build-request (string->uri url) #:method 'GET #:headers headers))) + +(define (at-most max-length lst) + "If LST is shorter than MAX-LENGTH, return it and the empty list; otherwise +return its MAX-LENGTH first elements and its tail." + (let loop ((len 0) + (lst lst) + (result '())) + (match lst + (() + (values (reverse result) '())) + ((head . tail) + (if (>= len max-length) + (values (reverse result) lst) + (loop (+ 1 len) tail (cons head result))))))) + +(define* (http-multiple-get base-uri proc seed requests + #:key port (verify-certificate? #t) + (open-connection guix:open-connection-for-uri) + (keep-alive? #t) + (batch-size 1000)) + "Send all of REQUESTS to the server at BASE-URI. Call PROC for each +response, passing it the request object, the response, a port from which to +read the response body, and the previous result, starting with SEED, à la +'fold'. Return the final result. + +When PORT is specified, use it as the initial connection on which HTTP +requests are sent; otherwise call OPEN-CONNECTION to open a new connection for +a URI. When KEEP-ALIVE? is false, close the connection port before +returning." + (let connect ((port port) + (requests requests) + (result seed)) + (define batch + (at-most batch-size requests)) + + ;; (format (current-error-port) "connecting (~a requests left)..." + ;; (length requests)) + (let ((p (or port (open-connection base-uri + #:verify-certificate? + verify-certificate?)))) + ;; For HTTPS, P is not a file port and does not support 'setvbuf'. + (when (file-port? p) + (setvbuf p 'block (expt 2 16))) + + ;; Send BATCH in a row. + ;; XXX: Do our own caching to work around inefficiencies when + ;; communicating over TLS: . + (let-values (((buffer get) (open-bytevector-output-port))) + ;; Inherit the HTTP proxying property from P. + (set-http-proxy-port?! buffer (http-proxy-port? p)) + + (for-each (cut write-request <> buffer) + batch) + (put-bytevector p (get)) + (force-output p)) + + ;; Now start processing responses. + (let loop ((sent batch) + (processed 0) + (result result)) + (match sent + (() + (match (drop requests processed) + (() + (unless keep-alive? + (close-port p)) + (reverse result)) + (remainder + (connect p remainder result)))) + ((head tail ...) + (let* ((resp (read-response p)) + (body (response-body-port resp)) + (result (proc head resp body result))) + ;; The server can choose to stop responding at any time, in which + ;; case we have to try again. Check whether that is the case. + ;; Note that even upon "Connection: close", we can read from BODY. + (match (assq 'connection (response-headers resp)) + (('connection 'close) + (close-port p) + (connect #f ;try again + (drop requests (+ 1 processed)) + result)) + (_ + (loop tail (+ 1 processed) result)))))))))) ;keep going + +(define (read-to-eof port) + "Read from PORT until EOF is reached. The data are discarded." + (dump-port port (%make-void-port "w"))) + +(define (narinfo-from-file file url) + "Attempt to read a narinfo from FILE, using URL as the cache URL. Return #f +if file doesn't exist, and the narinfo otherwise." + (catch 'system-error + (lambda () + (call-with-input-file file + (cut read-narinfo <> url))) + (lambda args + (if (= ENOENT (system-error-errno args)) + #f + (apply throw args))))) + +(define %unreachable-hosts + ;; Set of names of unreachable hosts. + (make-hash-table)) + +(define %max-cached-connections + ;; Maximum number of connections kept in cache by + ;; 'open-connection-for-uri/cached'. + 16) + +(define open-connection-for-uri/cached + (let ((cache '())) + (lambda* (uri #:key fresh? (timeout %fetch-timeout) verify-certificate?) + "Return a connection for URI, possibly reusing a cached connection. +When FRESH? is true, delete any cached connections for URI and open a new one. +Return #f if URI's scheme is 'file' or #f. + +When true, TIMEOUT is the maximum number of milliseconds to wait for +connection establishment. When VERIFY-CERTIFICATE? is true, verify HTTPS +server certificates." + (define host (uri-host uri)) + (define scheme (uri-scheme uri)) + (define key (list host scheme (uri-port uri))) + + (and (not (memq scheme '(file #f))) + (match (assoc-ref cache key) + (#f + ;; Open a new connection to URI and evict old entries from + ;; CACHE, if any. + (let-values (((socket) + (guix:open-connection-for-uri + uri + #:verify-certificate? verify-certificate? + #:timeout timeout)) + ((new-cache evicted) + (at-most (- %max-cached-connections 1) cache))) + (for-each (match-lambda + ((_ . port) + (false-if-exception (close-port port)))) + evicted) + (set! cache (alist-cons key socket new-cache)) + socket)) + (socket + (if (or fresh? (port-closed? socket)) + (begin + (false-if-exception (close-port socket)) + (set! cache (alist-delete key cache)) + (open-connection-for-uri/cached uri #:timeout timeout + #:verify-certificate? + verify-certificate?)) + (begin + ;; Drain input left from the previous use. + (drain-input socket) + socket)))))))) + +(define* (process-substitution/http destination narinfo #:key print-build-trace?) + (let-values (((uri compression file-size) + (narinfo-best-uri narinfo))) + (unless print-build-trace? + (format (current-error-port) + (G_ "Downloading ~a...~%") (uri->string uri))) + + (let*-values (((raw download-size) + ;; 'guix publish' without '--cache' doesn't specify a + ;; Content-Length, so DOWNLOAD-SIZE is #f in this case. + (fetch uri)) + ((progress) + (let* ((dl-size (or download-size + (and (equal? compression "none") + (narinfo-size narinfo)))) + (reporter (if print-build-trace? + (progress-reporter/trace + destination + (uri->string uri) dl-size + (current-error-port)) + (progress-reporter/file + (uri->string uri) dl-size + (current-error-port) + #:abbreviation nar-uri-abbreviation)))) + ;; Keep RAW open upon completion so we can later reuse + ;; the underlying connection. + (progress-report-port reporter raw #:close? #f))) + ((input pids) + ;; NOTE: This 'progress' port of current process will be + ;; closed here, while the child process doing the + ;; reporting will close it upon exit. + (decompressed-port (string->symbol compression) + progress))) + (values input + #:after-input-close + (lambda () + ;; Wait for the reporter to finish. + (every (compose zero? cdr waitpid) pids) + + ;; Skip a line after what 'progress-reporter/file' printed, + ;; and another one to visually separate substitutions. + (display "\n\n" (current-error-port))))))) + +(define* (call-with-connection-error-handling uri proc) + "Call PROC, and catch if a connection fails, print a warning and return #f." + (define host + (uri-host uri)) + + (catch #t + proc + (match-lambda* + (('getaddrinfo-error error) + (unless (hash-ref %unreachable-hosts host) + (hash-set! %unreachable-hosts host #t) ;warn only once + (warning (G_ "~a: host not found: ~a~%") + host (gai-strerror error))) + #f) + (('system-error . args) + (unless (hash-ref %unreachable-hosts host) + (hash-set! %unreachable-hosts host #t) + (warning (G_ "~a: connection failed: ~a~%") host + (strerror + (system-error-errno `(system-error ,@args))))) + #f) + (args + (apply throw args))))) + +(define* (fetch-narinfos/http url paths + #:key (open-connection + guix:open-connection-for-uri)) + "Retrieve all the narinfos for PATHS from the cache at URL and return them. +The URI scheme of URL must currently be http, https or file." + (define update-progress! + (let ((done 0) + (total (length paths))) + (lambda () + (display "\r\x1b[K" (current-error-port)) ;erase current line + (force-output (current-error-port)) + (format (current-error-port) + (G_ "updating substitutes from '~a'... ~5,1f%") + url (* 100. (/ done total))) + (set! done (+ 1 done))))) + + (define hash-part->path + (let ((mapping (fold (lambda (path result) + (vhash-cons (store-path-hash-part path) path + result)) + vlist-null + paths))) + (lambda (hash) + (match (vhash-assoc hash mapping) + (#f #f) + ((_ . path) path))))) + + (define (handle-narinfo-response request response port result) + (let* ((code (response-code response)) + (len (response-content-length response)) + (cache (response-cache-control response)) + (ttl (and cache (assoc-ref cache 'max-age)))) + (update-progress!) + + ;; Make sure to read no more than LEN bytes since subsequent bytes may + ;; belong to the next response. + (if (= code 200) ; hit + (let ((narinfo (read-narinfo port url #:size len))) + (if (string=? (dirname (narinfo-path narinfo)) + (%store-prefix)) + (begin + (cache-narinfo! url (narinfo-path narinfo) narinfo ttl) + (cons narinfo result)) + result)) + (let* ((path (uri-path (request-uri request))) + (hash-part (basename + (string-drop-right path 8)))) ;drop ".narinfo" + (if len + (get-bytevector-n port len) + (read-to-eof port)) + (cache-narinfo! url (hash-part->path hash-part) #f + (if (or (= 404 code) (= 202 code)) + ttl + %narinfo-transient-error-ttl)) + result)))) + + (define (do-fetch uri) + (case (and=> uri uri-scheme) + ((http https) + ;; Note: Do not check HTTPS server certificates to avoid depending + ;; on the X.509 PKI. We can do it because we authenticate + ;; narinfos, which provides a much stronger guarantee. + (let* ((requests (map (cut narinfo-request url <>) paths)) + (result (begin + (update-progress!) + (call-with-connection-error-handling + uri + (lambda () + (http-multiple-get uri + handle-narinfo-response '() + requests + #:open-connection open-connection + #:verify-certificate? #f)))))) + (newline (current-error-port)) + result)) + ((file #f) + (let* ((base (string-append (uri-path uri) "/")) + (files (map (compose (cut string-append base <> ".narinfo") + store-path-hash-part) + paths))) + (filter-map (cut narinfo-from-file <> url) files))) + (else + (leave (G_ "~s: unsupported server URI scheme~%") + (if uri (uri-scheme uri) url))))) + + (do-fetch (string->uri url))) + +(define http-substituter + (make-substituter 'http + process-substitution/http fetch-narinfos/http + '(http https file))) diff --git a/tests/substitute.scm b/tests/substitute.scm index 697abc4684..6c754f774d 100644 --- a/tests/substitute.scm +++ b/tests/substitute.scm @@ -1,6 +1,7 @@ ;;; GNU Guix --- Functional package management for GNU ;;; Copyright © 2014 Nikita Karetnikov ;;; Copyright © 2014, 2015, 2017, 2018, 2019 Ludovic Courtès +;;; Copyright © 2021 Maxime Devos ;;; ;;; This file is part of GNU Guix. ;;; @@ -38,7 +39,9 @@ #:use-module (rnrs bytevectors) #:use-module (rnrs io ports) #:use-module (web uri) + #:use-module (ice-9 receive) #:use-module (ice-9 regex) + #:use-module (srfi srfi-1) #:use-module (srfi srfi-11) #:use-module (srfi srfi-26) #:use-module (srfi srfi-34) @@ -585,6 +588,157 @@ System: mips64el-linux\n"))) (lambda () (false-if-exception (delete-file "substitute-retrieved"))))))) + + +;; Test substituter hooks. + +(test-quit "no substituters, no substitutes" + "no valid substitute" + ;; This substitute should be ignored + (with-narinfo (string-append %narinfo "Signature: " + (signature-field %narinfo)) + (dynamic-wind + (const #t) + (lambda () + (parameterize ((substituters '())) + (request-substitution + (string-append (%store-prefix) "/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-foo") + "substitute-retrieved"))) + (lambda () + (false-if-exception (delete-file "substitute-retrieved")))))) + +(define (call-with-output-bytevector proc) + (receive (port get-bytevector) + (open-bytevector-output-port) + (proc port) + (get-bytevector))) + +(define (string->nar-port string) + (define narinfo-directory %main-substitute-directory) + ;; Prepare the nar. + (call-with-output-file + (string-append narinfo-directory "/example.out") + (cut display string <>)) + (open-bytevector-input-port + (call-with-output-bytevector + (cut write-file (string-append narinfo-directory "/example.out") <>)))) + +;; Make a substituter looking up narinfos +;; and nars in an association list of +;; (URL PATH) -> narinfo +;; NARINFO-CONTENTS -> string +(define (alist->substituter narinfos nars) + (define (fetch-narinfos url paths . rest) + (define (fetch-narinfo path) + (filter-map (lambda (entry) + (and (equal? (car entry) (cons url path)) + (string->narinfo (cdr entry) url))) + narinfos)) + (concatenate (map fetch-narinfo paths))) + (define (nar-downloader destination narinfo . rest) + (string->nar-port + (assoc-ref nars (narinfo-contents narinfo)))) + (make-substituter 'test nar-downloader fetch-narinfos '(test))) + +(test-equal "query, fetch-narinfos is used" + (string-append (%store-prefix) "/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-foo") + (string-trim-both + (with-output-to-string + (lambda () + (define item + (string-append (%store-prefix) "/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-foo")) + (define substituter + (alist->substituter `((("test://x/y" . ,item) . ,%narinfo)) + '())) + (parameterize ((substituters (list substituter)) + (substitute-urls '("test://x/y")) + (%allow-unauthenticated-substitutes? #t)) + (with-input-from-string (string-append "have " item) + (lambda () + (guix-substitute "--query")))))))) + +;; XXX why does this result in "hash-mismatch sha256 ... ..."? +(test-equal "substitute, nar-downloader is used" + "Substitutable data." + (let* ((url "test://x/y") + (item (string-append (%store-prefix) "/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-foo")) + (narinfo-alist `(((,url . ,item) . ,%narinfo))) + (nar-alist `((,%narinfo . "Substitutable data."))) + (substituter (alist->substituter narinfo-alist nar-alist))) + (parameterize ((substituters + (list (alist->substituter narinfo-alist nar-alist))) + (substitute-urls '("test://x/y")) + (%allow-unauthenticated-substitutes? #t)) + (dynamic-wind + (const #t) + (lambda () + (request-substitution (string-append (%store-prefix) + "/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-foo") + "substitute-retrieved") + (call-with-input-file "substitute-retrieved" get-string-all)) + (lambda () + (false-if-exception (delete-file "substitute-retrieved"))))))) + +(test-equal "substitute, only supported URI schemes are passed to fetch-narinfos" + "Substitutable data." + (let () + (define (substituter-expecting protocol) + (make-substituter + (symbol-append 'test/ protocol) + (lambda (destination narinfo . rest) + (and (eq? protocol (uri-scheme (string->uri + (narinfo-uri-base narinfo)))) + (string->nar-port "Substitutable data."))) + (lambda (url paths . rest) + (unless (eq? protocol (uri-scheme (string->uri url))) + (error "what? I can't use that.")) + (list (string->narinfo %narinfo url))) + (list protocol))) + (parameterize ((substituters + (list (substituter-expecting 'x) + (substituter-expecting 'y))) + (substitute-urls '("x:///" "y:///")) + (%allow-unauthenticated-substitutes? #t)) + (dynamic-wind + (const #t) + (lambda () + (request-substitution (string-append (%store-prefix) + "/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-foo") + "substitute-retrieved") + (call-with-input-file "substitute-retrieved" get-string-all)) + (lambda () + (false-if-exception (delete-file "substitute-retrieved"))))))) + +;; Define a few substituters for testing purposes. +(define-module (guix scripts substitute test-x) + #:export (test-x-substituter) + #:use-module (guix scripts substitute)) + +(define test-x-substituter (make-substituter 'x #f #f '(x))) + +(define-module (guix scripts substitute test-y) + #:export (test-y-substituter) + #:use-module (guix scripts substitute)) + +(define test-y-substituter (make-substituter 'y #f #f '(x))) + +;; And we're back! +(define-module (test-substitute)) + +(test-equal "substitute, substituters are found in-order" + (list (@ (guix scripts substitute http) http-substituter) + (@ (guix scripts substitute test-x) test-x-substituter) + (@ (guix scripts substitute test-y) test-y-substituter)) + (let ((old-options #f)) + (dynamic-wind + (lambda () + (set! old-options (getenv "_NIX_OPTIONS")) + (setenv "_NIX_OPTIONS" "substitute-methods=http test-x test-y")) + (lambda () + ((@@ (guix scripts substitute) default-substituters))) + (lambda () + (setenv "_NIX_OPTIONS" old-options))))) + (test-end "substitute") ;;; Local Variables: -- 2.30.0