;;; GNU Guix --- Functional package management for GNU ;;; Copyright © 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020 Ludovic Courtès ;;; Copyright © 2014 Nikita Karetnikov ;;; Copyright © 2018 Kyle Meyer ;;; ;;; This file is part of GNU Guix. ;;; ;;; GNU Guix is free software; you can redistribute it and/or modify it ;;; under the terms of the GNU General Public License as published by ;;; the Free Software Foundation; either version 3 of the License, or (at ;;; your option) any later version. ;;; ;;; GNU Guix is distributed in the hope that it will be useful, but ;;; WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;;; GNU General Public License for more details. ;;; ;;; You should have received a copy of the GNU General Public License ;;; along with GNU Guix. If not, see . (define-module (guix substitutes) #:use-module (guix i18n) #:use-module (guix cache) #:use-module (guix store) #:use-module (guix utils) #:use-module (guix base32) #:use-module (guix config) #:use-module (guix narinfo) #:use-module (guix combinators) #:use-module (guix diagnostics) #:use-module ((guix build utils) #:select (mkdir-p dump-port)) #:use-module ((guix build download) #:select ((open-connection-for-uri . guix:open-connection-for-uri))) #:use-module (gcrypt hash) #:use-module (ice-9 match) #:use-module (ice-9 vlist) #:use-module (ice-9 format) #:use-module (ice-9 binary-ports) #:use-module (rnrs bytevectors) #:use-module (srfi srfi-1) #:use-module (srfi srfi-11) #:use-module (srfi srfi-19) #:use-module (srfi srfi-26) #:use-module (web uri) #:use-module (web http) #:use-module (web request) #:use-module (web response) #:export (%narinfo-cache-directory with-cached-connection lookup-narinfos lookup-narinfos/diverse)) (define %narinfo-cache-directory ;; A local cache of narinfos, to avoid going to the network. Most of the ;; time, 'guix substitute' is called by guix-daemon as root and stores its ;; cached data in /var/guix/…. However, when invoked from 'guix challenge' ;; as a user, it stores its cache in ~/.cache. (if (zero? (getuid)) (or (and=> (getenv "XDG_CACHE_HOME") (cut string-append <> "/guix/substitute")) (string-append %state-directory "/substitute/cache")) (string-append (cache-directory #:ensure? #f) "/substitute"))) (define %narinfo-ttl ;; Number of seconds during which cached narinfo lookups are considered ;; valid for substitute servers that do not advertise a TTL via the ;; 'Cache-Control' response header. (* 36 3600)) (define %narinfo-negative-ttl ;; Likewise, but for negative lookups---i.e., cached lookup failures (404). (* 1 3600)) (define %narinfo-transient-error-ttl ;; Likewise, but for transient errors such as 504 ("Gateway timeout"). (* 10 60)) (define %fetch-timeout ;; Number of seconds after which networking is considered "slow". 5) (define (narinfo-cache-file cache-url path) "Return the name of the local file that contains an entry for PATH. The entry is stored in a sub-directory specific to CACHE-URL." ;; The daemon does not sanitize its input, so PATH could be something like ;; "/gnu/store/foo". Gracefully handle that. (match (store-path-hash-part path) (#f (leave (G_ "'~a' does not name a store item~%") path)) ((? string? hash-part) (string-append %narinfo-cache-directory "/" (bytevector->base32-string (sha256 (string->utf8 cache-url))) "/" hash-part)))) (define (cache-narinfo! cache-url path narinfo ttl) "Cache locally NARNIFO for PATH, which originates from CACHE-URL, with the given TTL (a number of seconds or #f). NARINFO may be #f, in which case it indicates that PATH is unavailable at CACHE-URL." (define now (current-time time-monotonic)) (define (cache-entry cache-uri narinfo) `(narinfo (version 2) (cache-uri ,cache-uri) (date ,(time-second now)) (ttl ,(or ttl (if narinfo %narinfo-ttl %narinfo-negative-ttl))) (value ,(and=> narinfo narinfo->string)))) (let ((file (narinfo-cache-file cache-url path))) (mkdir-p (dirname file)) (with-atomic-file-output file (lambda (out) (write (cache-entry cache-url narinfo) out)))) narinfo) (define %max-cached-connections ;; Maximum number of connections kept in cache by ;; 'open-connection-for-uri/cached'. 16) (define open-connection-for-uri/cached (let ((cache '())) (lambda* (uri #:key fresh? timeout verify-certificate?) "Return a connection for URI, possibly reusing a cached connection. When FRESH? is true, delete any cached connections for URI and open a new one. Return #f if URI's scheme is 'file' or #f. When true, TIMEOUT is the maximum number of milliseconds to wait for connection establishment. When VERIFY-CERTIFICATE? is true, verify HTTPS server certificates." (define host (uri-host uri)) (define scheme (uri-scheme uri)) (define key (list host scheme (uri-port uri))) (and (not (memq scheme '(file #f))) (match (assoc-ref cache key) (#f ;; Open a new connection to URI and evict old entries from ;; CACHE, if any. (let-values (((socket) (guix:open-connection-for-uri uri #:verify-certificate? verify-certificate? #:timeout timeout)) ((new-cache evicted) (at-most (- %max-cached-connections 1) cache))) (for-each (match-lambda ((_ . port) (false-if-exception (close-port port)))) evicted) (set! cache (alist-cons key socket new-cache)) socket)) (socket (if (or fresh? (port-closed? socket)) (begin (false-if-exception (close-port socket)) (set! cache (alist-delete key cache)) (open-connection-for-uri/cached uri #:timeout timeout #:verify-certificate? verify-certificate?)) (begin ;; Drain input left from the previous use. (drain-input socket) socket)))))))) (define* (call-with-cached-connection uri proc #:optional (open-connection open-connection-for-uri/cached)) (let ((port (open-connection uri))) (catch #t (lambda () (proc port)) (lambda (key . args) ;; If PORT was cached and the server closed the connection in the ;; meantime, we get EPIPE. In that case, open a fresh connection and ;; retry. We might also get 'bad-response or a similar exception from ;; (web response) later on, once we've sent the request. (if (or (and (eq? key 'system-error) (= EPIPE (system-error-errno `(,key ,@args)))) (memq key '(bad-response bad-header bad-header-component))) (proc (open-connection uri #:fresh? #t)) (apply throw key args)))))) (define-syntax-rule (with-cached-connection uri port exp ...) "Bind PORT with EXP... to a socket connected to URI." (call-with-cached-connection uri (lambda (port) exp ...))) (define (at-most max-length lst) "If LST is shorter than MAX-LENGTH, return it and the empty list; otherwise return its MAX-LENGTH first elements and its tail." (let loop ((len 0) (lst lst) (result '())) (match lst (() (values (reverse result) '())) ((head . tail) (if (>= len max-length) (values (reverse result) lst) (loop (+ 1 len) tail (cons head result))))))) (define* (http-multiple-get base-uri proc seed requests #:key port (verify-certificate? #t) (open-connection guix:open-connection-for-uri) (keep-alive? #t) (batch-size 1000)) "Send all of REQUESTS to the server at BASE-URI. Call PROC for each response, passing it the request object, the response, a port from which to read the response body, and the previous result, starting with SEED, à la 'fold'. Return the final result. When PORT is specified, use it as the initial connection on which HTTP requests are sent; otherwise call OPEN-CONNECTION to open a new connection for a URI. When KEEP-ALIVE? is false, close the connection port before returning." (let connect ((port port) (requests requests) (result seed)) (define batch (at-most batch-size requests)) ;; (format (current-error-port) "connecting (~a requests left)..." ;; (length requests)) (let ((p (or port (open-connection base-uri #:verify-certificate? verify-certificate?)))) ;; For HTTPS, P is not a file port and does not support 'setvbuf'. (when (file-port? p) (setvbuf p 'block (expt 2 16))) ;; Send BATCH in a row. ;; XXX: Do our own caching to work around inefficiencies when ;; communicating over TLS: . (let-values (((buffer get) (open-bytevector-output-port))) ;; Inherit the HTTP proxying property from P. (set-http-proxy-port?! buffer (http-proxy-port? p)) (for-each (cut write-request <> buffer) batch) (put-bytevector p (get)) (force-output p)) ;; Now start processing responses. (let loop ((sent batch) (processed 0) (result result)) (match sent (() (match (drop requests processed) (() (unless keep-alive? (close-port p)) (reverse result)) (remainder (connect p remainder result)))) ((head tail ...) (let* ((resp (read-response p)) (body (response-body-port resp)) (result (proc head resp body result))) ;; The server can choose to stop responding at any time, in which ;; case we have to try again. Check whether that is the case. ;; Note that even upon "Connection: close", we can read from BODY. (match (assq 'connection (response-headers resp)) (('connection 'close) (close-port p) (connect #f ;try again (drop requests (+ 1 processed)) result)) (_ (loop tail (+ 1 processed) result)))))))))) ;keep going (define %unreachable-hosts ;; Set of names of unreachable hosts. (make-hash-table)) (define* (open-connection-for-uri/maybe uri #:key fresh? (time %fetch-timeout)) "Open a connection to URI via 'open-connection-for-uri/cached' and return a port to it, or, if connection failed, print a warning and return #f. Pass #:fresh? to 'open-connection-for-uri/cached'." (define host (uri-host uri)) (catch #t (lambda () (open-connection-for-uri/cached uri #:timeout time #:fresh? fresh?)) (match-lambda* (('getaddrinfo-error error) (unless (hash-ref %unreachable-hosts host) (hash-set! %unreachable-hosts host #t) ;warn only once (warning (G_ "~a: host not found: ~a~%") host (gai-strerror error))) #f) (('system-error . args) (unless (hash-ref %unreachable-hosts host) (hash-set! %unreachable-hosts host #t) (warning (G_ "~a: connection failed: ~a~%") host (strerror (system-error-errno `(system-error ,@args))))) #f) (args (apply throw args))))) (define (read-to-eof port) "Read from PORT until EOF is reached. The data are discarded." (dump-port port (%make-void-port "w"))) (define (narinfo-request cache-url path) "Return an HTTP request for the narinfo of PATH at CACHE-URL." (let ((url (string-append cache-url "/" (store-path-hash-part path) ".narinfo")) (headers '((User-Agent . "GNU Guile")))) (build-request (string->uri url) #:method 'GET #:headers headers))) (define (narinfo-from-file file url) "Attempt to read a narinfo from FILE, using URL as the cache URL. Return #f if file doesn't exist, and the narinfo otherwise." (catch 'system-error (lambda () (call-with-input-file file (cut read-narinfo <> url))) (lambda args (if (= ENOENT (system-error-errno args)) #f (apply throw args))))) (define (fetch-narinfos url paths) "Retrieve all the narinfos for PATHS from the cache at URL and return them." (define update-progress! (let ((done 0) (total (length paths))) (lambda () (display "\r\x1b[K" (current-error-port)) ;erase current line (force-output (current-error-port)) (format (current-error-port) (G_ "updating substitutes from '~a'... ~5,1f%") url (* 100. (/ done total))) (set! done (+ 1 done))))) (define hash-part->path (let ((mapping (fold (lambda (path result) (vhash-cons (store-path-hash-part path) path result)) vlist-null paths))) (lambda (hash) (match (vhash-assoc hash mapping) (#f #f) ((_ . path) path))))) (define (handle-narinfo-response request response port result) (let* ((code (response-code response)) (len (response-content-length response)) (cache (response-cache-control response)) (ttl (and cache (assoc-ref cache 'max-age)))) (update-progress!) ;; Make sure to read no more than LEN bytes since subsequent bytes may ;; belong to the next response. (if (= code 200) ; hit (let ((narinfo (read-narinfo port url #:size len))) (if (string=? (dirname (narinfo-path narinfo)) (%store-prefix)) (begin (cache-narinfo! url (narinfo-path narinfo) narinfo ttl) (cons narinfo result)) result)) (let* ((path (uri-path (request-uri request))) (hash-part (basename (string-drop-right path 8)))) ;drop ".narinfo" (if len (get-bytevector-n port len) (read-to-eof port)) (cache-narinfo! url (hash-part->path hash-part) #f (if (or (= 404 code) (= 202 code)) ttl %narinfo-transient-error-ttl)) result)))) (define (do-fetch uri) (case (and=> uri uri-scheme) ((http https) ;; Note: Do not check HTTPS server certificates to avoid depending ;; on the X.509 PKI. We can do it because we authenticate ;; narinfos, which provides a much stronger guarantee. (let* ((requests (map (cut narinfo-request url <>) paths)) (result (call-with-cached-connection uri (lambda (port) (if port (begin (update-progress!) (http-multiple-get uri handle-narinfo-response '() requests #:open-connection open-connection-for-uri/cached #:verify-certificate? #f #:port port)) '())) open-connection-for-uri/maybe))) (newline (current-error-port)) result)) ((file #f) (let* ((base (string-append (uri-path uri) "/")) (files (map (compose (cut string-append base <> ".narinfo") store-path-hash-part) paths))) (filter-map (cut narinfo-from-file <> url) files))) (else (leave (G_ "~s: unsupported server URI scheme~%") (if uri (uri-scheme uri) url))))) (do-fetch (string->uri url))) (define (cached-narinfo cache-url path) "Check locally if we have valid info about PATH coming from CACHE-URL. Return two values: a Boolean indicating whether we have valid cached info, and that info, which may be either #f (when PATH is unavailable) or the narinfo for PATH." (define now (current-time time-monotonic)) (define cache-file (narinfo-cache-file cache-url path)) (catch 'system-error (lambda () (call-with-input-file cache-file (lambda (p) (match (read p) (('narinfo ('version 2) ('cache-uri cache-uri) ('date date) ('ttl ttl) ('value #f)) ;; A cached negative lookup. (if (obsolete? date now ttl) (values #f #f) (values #t #f))) (('narinfo ('version 2) ('cache-uri cache-uri) ('date date) ('ttl ttl) ('value value)) ;; A cached positive lookup (if (obsolete? date now ttl) (values #f #f) (values #t (string->narinfo value cache-uri)))) (('narinfo ('version v) _ ...) (values #f #f)))))) (lambda _ (values #f #f)))) (define (lookup-narinfos cache paths) "Return the narinfos for PATHS, invoking the server at CACHE when no information is available locally." (let-values (((cached missing) (fold2 (lambda (path cached missing) (let-values (((valid? value) (cached-narinfo cache path))) (if valid? (if value (values (cons value cached) missing) (values cached missing)) (values cached (cons path missing))))) '() '() paths))) (if (null? missing) cached (let ((missing (fetch-narinfos cache missing))) (append cached (or missing '())))))) (define (lookup-narinfos/diverse caches paths authorized?) "Look up narinfos for PATHS on all of CACHES, a list of URLS, in that order. That is, when a cache lacks an AUTHORIZED? narinfo, look it up in the next cache, and so on. Return a list of narinfos for PATHS or a subset thereof. The returned narinfos are either AUTHORIZED?, or they claim a hash that matches an AUTHORIZED? narinfo." (define (select-hit result) (lambda (path) (match (vhash-fold* cons '() path result) ((one) one) ((several ..1) (let ((authorized (find authorized? (reverse several)))) (and authorized (find (cut equivalent-narinfo? <> authorized) several))))))) (let loop ((caches caches) (paths paths) (result vlist-null) ;path->narinfo vhash (hits '())) ;paths (match paths (() ;we're done ;; Now iterate on all the HITS, and return exactly one match for each ;; hit: the first narinfo that is authorized, or that has the same hash ;; as an authorized narinfo, in the order of CACHES. (filter-map (select-hit result) hits)) (_ (match caches ((cache rest ...) (let* ((narinfos (lookup-narinfos cache paths)) (definite (map narinfo-path (filter authorized? narinfos))) (missing (lset-difference string=? paths definite))) ;XXX: perf (loop rest missing (fold vhash-cons result (map narinfo-path narinfos) narinfos) (append definite hits)))) (() ;that's it (filter-map (select-hit result) hits))))))) ;;; Local Variables: ;;; eval: (put 'with-timeout 'scheme-indent-function 1) ;;; eval: (put 'with-cached-connection 'scheme-indent-function 2) ;;; eval: (put 'call-with-cached-connection 'scheme-indent-function 1) ;;; End: ;;; substitutes.scm ends here