From ac87535ebdcc7490d9bbd552c99337917e42a6e8 Mon Sep 17 00:00:00 2001 From: Carlo Zancanaro Date: Wed, 21 Feb 2018 22:57:59 +1100 Subject: [PATCH] shepherd: Poll every 0.5s to find dead forked services * modules/shepherd.scm (open-server-socket): Set socket to be non-blocking. (main): Use select with a timeout, and call check-for-dead-services between connections/timeouts. * modules/shepherd/service.scm (fork+exec-command): Install handle-SIGCHLD as signal handler. (respawn-service): Separate logic for respawning services from handling SIGCHLD. (handle-SIGCHLD, check-for-dead-services): New exported procedures. * tests/basic.sh, tests/status-sexp.sh: Replace constant integers with symbols. * tests/forking-service.sh: New file. * Makefile.am: Add tests/forking-service.sh. --- Makefile.am | 3 +- modules/shepherd.scm | 19 ++++++-- modules/shepherd/service.scm | 78 ++++++++++++++++++------------ tests/basic.sh | 4 +- tests/forking-service.sh | 111 +++++++++++++++++++++++++++++++++++++++++++ tests/status-sexp.sh | 4 +- 6 files changed, 179 insertions(+), 40 deletions(-) create mode 100644 tests/forking-service.sh diff --git a/Makefile.am b/Makefile.am index 1c394e1..5eb058f 100644 --- a/Makefile.am +++ b/Makefile.am @@ -189,7 +189,8 @@ TESTS = \ tests/no-home.sh \ tests/pid-file.sh \ tests/status-sexp.sh \ - tests/signals.sh + tests/signals.sh \ + tests/forking-service.sh TEST_EXTENSIONS = .sh EXTRA_DIST += $(TESTS) diff --git a/modules/shepherd.scm b/modules/shepherd.scm index 650ba63..cdcd328 100644 --- a/modules/shepherd.scm +++ b/modules/shepherd.scm @@ -42,6 +42,8 @@ (with-fluids ((%default-port-encoding "UTF-8")) (let ((sock (socket PF_UNIX SOCK_STREAM 0)) (address (make-socket-address AF_UNIX file-name))) + (fcntl sock F_SETFL (logior O_NONBLOCK + (fcntl sock F_GETFL))) (bind sock address) (listen sock 10) sock))) @@ -218,11 +220,18 @@ (_ #t)) (let next-command () - (match (accept sock) - ((command-source . client-address) - (setvbuf command-source _IOFBF 1024) - (process-connection command-source)) - (_ #f)) + (define (read-from sock) + (match (accept sock) + ((command-source . client-address) + (setvbuf command-source _IOFBF 1024) + (process-connection command-source)) + (_ #f))) + (match (select (list sock) (list) (list) 0.5) + (((sock) _ _) + (read-from sock)) + (_ + #f)) + (check-for-dead-services) (next-command)))))) (define (process-connection sock) diff --git a/modules/shepherd/service.scm b/modules/shepherd/service.scm index 83600e4..22972c5 100644 --- a/modules/shepherd/service.scm +++ b/modules/shepherd/service.scm @@ -3,6 +3,7 @@ ;; Copyright (C) 2002, 2003 Wolfgang Järling ;; Copyright (C) 2014 Alex Sassmannshausen ;; Copyright (C) 2016 Alex Kost +;; Copyright (C) 2018 Carlo Zancanaro ;; ;; This file is part of the GNU Shepherd. ;; @@ -64,6 +65,7 @@ for-each-service lookup-services respawn-service + handle-SIGCHLD register-services provided-by required-by @@ -77,6 +79,7 @@ make-system-destructor make-init.d-service + check-for-dead-services root-service make-actions @@ -800,7 +803,7 @@ false." its PID." ;; Install the SIGCHLD handler if this is the first fork+exec-command call (unless %sigchld-handler-installed? - (sigaction SIGCHLD respawn-service SA_NOCLDSTOP) + (sigaction SIGCHLD handle-SIGCHLD SA_NOCLDSTOP) (set! %sigchld-handler-installed? #t)) (let ((pid (primitive-fork))) (if (zero? pid) @@ -991,7 +994,7 @@ child left." what (strerror errno)) '(0 . #f))))))) -(define (respawn-service signum) +(define (handle-SIGCHLD signum) "Handle SIGCHLD, possibly by respawning the service that just died, or otherwise by updating its state." (let loop () @@ -1009,39 +1012,42 @@ otherwise by updating its state." ;; SERV can be #f for instance when this code runs just after a ;; service's 'stop' method killed its process and completed. - (when serv - (slot-set! serv 'running #f) - (if (and (respawn? serv) - (not (respawn-limit-hit? (slot-ref serv 'last-respawns) - (car respawn-limit) - (cdr respawn-limit)))) - (if (not (slot-ref serv 'waiting-for-termination?)) - (begin - ;; Everything is okay, start it. - (local-output "Respawning ~a." - (canonical-name serv)) - (slot-set! serv 'last-respawns - (cons (current-time) - (slot-ref serv 'last-respawns))) - (start serv)) - ;; We have just been waiting for the - ;; termination. The `running' slot has already - ;; been set to `#f' by `stop'. - (begin - (local-output "Service ~a terminated." - (canonical-name serv)) - (slot-set! serv 'waiting-for-termination? #f))) - (begin - (local-output "Service ~a has been disabled." - (canonical-name serv)) - (when (respawn? serv) - (local-output " (Respawning too fast.)")) - (slot-set! serv 'enabled? #f)))) + (respawn-service serv) ;; As noted in libc's manual (info "(libc) Process Completion"), ;; loop so we don't miss any terminated child process. (loop)))))) +(define (respawn-service serv) + (when serv + (slot-set! serv 'running #f) + (if (and (respawn? serv) + (not (respawn-limit-hit? (slot-ref serv 'last-respawns) + (car respawn-limit) + (cdr respawn-limit)))) + (if (not (slot-ref serv 'waiting-for-termination?)) + (begin + ;; Everything is okay, start it. + (local-output "Respawning ~a." + (canonical-name serv)) + (slot-set! serv 'last-respawns + (cons (current-time) + (slot-ref serv 'last-respawns))) + (start serv)) + ;; We have just been waiting for the + ;; termination. The `running' slot has already + ;; been set to `#f' by `stop'. + (begin + (local-output "Service ~a terminated." + (canonical-name serv)) + (slot-set! serv 'waiting-for-termination? #f))) + (begin + (local-output "Service ~a has been disabled." + (canonical-name serv)) + (when (respawn? serv) + (local-output " (Respawning too fast.)")) + (slot-set! serv 'enabled? #f))))) + ;; Add NEW-SERVICES to the list of known services. (define (register-services . new-services) (define (register-single-service new) @@ -1171,6 +1177,18 @@ file when persistence is enabled." (lambda (p) (format p "~{~a ~}~%" running-services)))))) +(define (check-for-dead-services) + (define (process-exists? pid) + (catch #t + (lambda () (kill pid 0) #t) + (lambda _ #f))) + (for-each-service (lambda (service) + (let ((running (slot-ref service 'running))) + (when (and (integer? running) + (not (process-exists? running))) + (local-output "PID ~a (~a) is dead!" running (canonical-name service)) + (respawn-service service)))))) + (define root-service (make #:docstring "The root service is used to operate on shepherd itself." diff --git a/tests/basic.sh b/tests/basic.sh index 1ddb334..2ecd8fb 100644 --- a/tests/basic.sh +++ b/tests/basic.sh @@ -150,7 +150,7 @@ cat > "$confdir/some-conf.scm" < #:provides '(test-loaded) - #:start (const 42) + #:start (const 'abc) #:stop (const #f))) EOF @@ -166,7 +166,7 @@ $herd status test-loaded $herd status test-loaded | grep stopped $herd start test-loaded -$herd status test-loaded | grep -i 'running.*42' +$herd status test-loaded | grep -i 'running.*abc' $herd stop test-loaded $herd unload root test-loaded diff --git a/tests/forking-service.sh b/tests/forking-service.sh new file mode 100644 index 0000000..90c684a --- /dev/null +++ b/tests/forking-service.sh @@ -0,0 +1,111 @@ +# GNU Shepherd --- Test detecting a forked process' termination +# Copyright © 2016 Ludovic Courtès +# Copyright © 2018 Carlo Zancanaro +# +# This file is part of the GNU Shepherd. +# +# The GNU Shepherd is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or (at +# your option) any later version. +# +# The GNU Shepherd is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with the GNU Shepherd. If not, see . + +shepherd --version +herd --version + +socket="t-socket-$$" +conf="t-conf-$$" +log="t-log-$$" +pid="t-pid-$$" +service_pid="t-service-pid-$$" +service2_pid="t-service2-pid-$$" +service2_started="t-service2-starts-$$" + +herd="herd -s $socket" + +function cleanup() { + cat $log || true + rm -f $socket $conf $log $service2_started + test -f $pid && kill "$(cat $pid)" || true + rm -f $pid + test -f $service_pid && kill "$(cat $service_pid)" || true + rm -f $service_pid + test -f $service2_pid && kill "$(cat $service2_pid)" || true + rm -f $service2_pid +} + +trap cleanup EXIT + +cat > "$conf"< $PWD/$service_pid")) + +(register-services + (make + ;; A service that forks into a different process. + #:provides '(test) + #:start (make-forkexec-constructor %command + #:pid-file "$PWD/$service_pid") + #:stop (make-kill-destructor) + #:respawn? #f)) + +(define %command2 + '("$SHELL" "-c" "echo started >> $PWD/$service2_started; sleep 600 & echo \$! > $PWD/$service2_pid")) + +(register-services + (make + ;; A service that forks into a different process. + #:provides '(test2) + #:start (make-forkexec-constructor %command2 + #:pid-file "$PWD/$service2_pid") + #:stop (make-kill-destructor) + #:respawn? #t)) +EOF +cat $conf + +rm -f "$pid" +shepherd -I -s "$socket" -c "$conf" -l "$log" --pid="$pid" & + +# Wait till it's ready. +while ! test -f "$pid" ; do sleep 0.3 ; done + +shepherd_pid="$(cat $pid)" + +# start both of the services +$herd start test +$herd start test2 + +# make sure "test" is started +until $herd status test | grep started; do sleep 0.3; done +test -f "$service_pid" +service_pid_value="$(cat $service_pid)" +# now kill it +kill "$service_pid_value" +while kill -0 "$service_pid_value"; do sleep 0.3; done +# shepherd should notice that the service has stopped within one second +sleep 1 +$herd status test | grep stopped + + + +# make sure "test2" has started +until $herd status test2 | grep started; do sleep 0.3; done +test -f "$service2_pid" +service2_pid_value="$(cat $service2_pid)" +test "$(cat $PWD/$service2_started)" = "started" +# now kill it +rm -f "$service2_pid" +kill $service2_pid_value +while kill -0 "$service2_pid_value"; do sleep 0.3; done +# shepherd should notice that the service has stopped, and restart it, within one second +sleep 1; +$herd status test2 | grep started +test "$(cat $PWD/$service2_started)" = "started +started" diff --git a/tests/status-sexp.sh b/tests/status-sexp.sh index b7c8cb4..11b967e 100644 --- a/tests/status-sexp.sh +++ b/tests/status-sexp.sh @@ -33,7 +33,7 @@ cat > "$conf"< #:provides '(foo) - #:start (const 42) + #:start (const 'abc) #:stop (const #f) #:docstring "Foo!" #:respawn? #t) @@ -85,7 +85,7 @@ root_service_sexp=" (service (version 0) (provides (foo)) (requires ()) (respawn? #t) (docstring \"Foo!\") - (enabled? #t) (running 42) (conflicts ()) + (enabled? #t) (running abc) (conflicts ()) (last-respawns ())) (service (version 0) (provides (bar)) (requires (foo)) -- 2.16.1