diff --git a/gnu/build/linux-container.scm b/gnu/build/linux-container.scm index bdeca2cdb9..308c0bb325 100644 --- a/gnu/build/linux-container.scm +++ b/gnu/build/linux-container.scm @@ -1,6 +1,6 @@ ;;; GNU Guix --- Functional package management for GNU ;;; Copyright © 2015 David Thompson -;;; Copyright © 2017, 2018, 2019 Ludovic Courtès +;;; Copyright © 2017, 2018, 2019, 2022 Ludovic Courtès ;;; ;;; This file is part of GNU Guix. ;;; @@ -21,6 +21,7 @@ (define-module (gnu build linux-container) #:use-module (ice-9 format) #:use-module (ice-9 match) #:use-module (ice-9 rdelim) + #:use-module (srfi srfi-1) #:use-module (srfi srfi-98) #:use-module (guix build utils) #:use-module (guix build syscalls) @@ -33,7 +34,8 @@ (define-module (gnu build linux-container) run-container call-with-container container-excursion - container-excursion*)) + container-excursion* + self-sever)) (define (user-namespace-supported?) "Return #t if user namespaces are supported on this system." @@ -174,50 +176,53 @@ (define* (mount* source target type #:optional (flags 0) options (chmod "/" #o755))) (define* (initialize-user-namespace pid host-uids - #:key (guest-uid 0) (guest-gid 0)) + #:key (guest-uid 0) (guest-gid 0) + (uid (getuid)) (gid (getgid))) "Configure the user namespace for PID. HOST-UIDS specifies the number of host user identifiers to map into the user namespace. GUEST-UID and GUEST-GID specify the first UID (respectively GID) that host UIDs (respectively GIDs) map to in the namespace." (define proc-dir - (string-append "/proc/" (number->string pid))) + (string-append "/proc/" + (match pid + ('self "self") + (_ (number->string pid))))) (define (scope file) (string-append proc-dir file)) - (let ((uid (getuid)) - (gid (getgid))) - - ;; Only root can write to the gid map without first disabling the - ;; setgroups syscall. - (unless (and (zero? uid) (zero? gid)) - (call-with-output-file (scope "/setgroups") - (lambda (port) - (display "deny" port)))) - - ;; Map the user/group that created the container to the root user - ;; within the container. - (call-with-output-file (scope "/uid_map") + ;; Only root can write to the gid map without first disabling the + ;; setgroups syscall. + (unless (and (zero? uid) (zero? gid)) + (call-with-output-file (scope "/setgroups") (lambda (port) - (format port "~d ~d ~d" guest-uid uid host-uids))) - (call-with-output-file (scope "/gid_map") - (lambda (port) - (format port "~d ~d ~d" guest-gid gid host-uids))))) + (display "deny" port)))) + + ;; Map the user/group that created the container to the root user + ;; within the container. + (call-with-output-file (scope "/uid_map") + (lambda (port) + (format port "~d ~d ~d" guest-uid uid host-uids))) + (call-with-output-file (scope "/gid_map") + (lambda (port) + (format port "~d ~d ~d" guest-gid gid host-uids)))) (define (namespaces->bit-mask namespaces) "Return the number suitable for the 'flags' argument of 'clone' that corresponds to the symbols in NAMESPACES." ;; Use the same flags as fork(3) in addition to the namespace flags. - (apply logior SIGCHLD - (map (match-lambda - ('cgroup CLONE_NEWCGROUP) - ('mnt CLONE_NEWNS) - ('uts CLONE_NEWUTS) - ('ipc CLONE_NEWIPC) - ('user CLONE_NEWUSER) - ('pid CLONE_NEWPID) - ('net CLONE_NEWNET)) - namespaces))) + (fold (lambda (namespace flags) + (logior flags + (match namespace + ('cgroup CLONE_NEWCGROUP) + ('mnt CLONE_NEWNS) + ('uts CLONE_NEWUTS) + ('ipc CLONE_NEWIPC) + ('user CLONE_NEWUSER) + ('pid CLONE_NEWPID) + ('net CLONE_NEWNET)))) + 0 + namespaces)) (define* (run-container root mounts namespaces host-uids thunk #:key (guest-uid 0) (guest-gid 0)) @@ -236,7 +241,7 @@ (define* (run-container root mounts namespaces host-uids thunk (match (socketpair PF_UNIX SOCK_STREAM 0) ((child . parent) (let ((flags (namespaces->bit-mask namespaces))) - (match (clone flags) + (match (clone (logior SIGCHLD flags)) (0 (call-with-clean-exit (lambda () @@ -392,3 +397,23 @@ (define (container-excursion* pid thunk) (close-port out) (close-port in) #f))))) + +(define* (self-sever mounts + #:key (namespaces %namespaces) (host-uids 1) + (guest-uid 0) (guest-gid 0)) + (let ((uid (getuid)) + (gid (getgid))) + (unshare (namespaces->bit-mask namespaces)) + + (initialize-user-namespace 'self host-uids + #:uid uid #:gid gid + #:guest-uid uid + #:guest-gid guest-gid) + + (when (memq 'mnt namespaces) + ;; (mount "none" "/" #f (logior MS_REC MS_PRIVATE)) + (call-with-temporary-directory + (lambda (root) + (mount-file-systems root mounts + #:mount-/proc? (memq 'pid namespaces) + #:mount-/sys? (memq 'net namespaces))))))) diff --git a/guix/build/syscalls.scm b/guix/build/syscalls.scm index a7401fd73f..5ee6bd1229 100644 --- a/guix/build/syscalls.scm +++ b/guix/build/syscalls.scm @@ -1,5 +1,5 @@ ;;; GNU Guix --- Functional package management for GNU -;;; Copyright © 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021 Ludovic Courtès +;;; Copyright © 2014-2022 Ludovic Courtès ;;; Copyright © 2015 David Thompson ;;; Copyright © 2015 Mark H Weaver ;;; Copyright © 2017 Mathieu Othacehe @@ -49,6 +49,11 @@ (define-module (guix build syscalls) MS_RELATIME MS_BIND MS_MOVE + MS_REC + MS_SILENT + MS_POSIXACL + MS_UNBINDABLE + MS_PRIVATE MS_LAZYTIME MNT_FORCE MNT_DETACH @@ -140,6 +145,7 @@ (define-module (guix build syscalls) CLONE_NEWPID CLONE_NEWNET clone + unshare setns PF_PACKET @@ -537,6 +543,11 @@ (define MS_REMOUNT 32) (define MS_NOATIME 1024) (define MS_BIND 4096) (define MS_MOVE 8192) +(define MS_REC 16384) +(define MS_SILENT 32768) +(define MS_POSIXACL 65536) +(define MS_UNBINDABLE 131072) +(define MS_PRIVATE 262144) (define MS_RELATIME 2097152) (define MS_STRICTATIME 16777216) (define MS_LAZYTIME 33554432) @@ -1101,6 +1112,23 @@ (define clone (list err)) ret))))) +(define unshare + (let ((proc (syscall->procedure int "unshare" (list int)))) + (lambda (flags) + "Disassociate the current process from parts of its execution context +according to FLAGS, which must be a logical or of CLONE_NEW* constants. + +Note that CLONE_NEWUSER requires that the calling process be single-threaded, +which is possible if and only if libgc is running a single marker thread; this +can be achieved by setting the GC_MARKERS environment variable to 1. If the +calling process is multi-threaded, this throws to 'system-error' with EINVAL." + (let-values (((ret err) + (without-automatic-finalization (proc flags)))) + (unless (zero? ret) + (throw 'system-error "unshare" "~a: ~A" + (list flags (strerror err)) + err)))))) + (define setns ;; Some systems may be using an old (pre-2.14) version of glibc where there ;; is no 'setns' function available.