=== modified file 'src/ChangeLog' --- src/ChangeLog 2012-10-25 04:35:39 +0000 +++ src/ChangeLog 2012-10-25 05:29:40 +0000 @@ -1,5 +1,38 @@ 2012-10-25 Paul Eggert + Fix a race condition that causes Emacs to mess up glib (Bug#8855). + The symptom is a diagnostic "GLib-WARNING **: In call to + g_spawn_sync(), exit status of a child process was requested but + SIGCHLD action was set to SIG_IGN and ECHILD was received by + waitpid(), so exit status can't be returned." The diagnostic + is partly wrong, as the SIGCHLD action is not set to SIG_IGN. + The real bug is a race condition between Emacs and glib: Emacs + does a waitpid (-1, ...) and reaps glib's subprocess by mistake, + so that glib can't find it. Work around the bug by invoking + waitpid only on subprocesses that Emacs itself creates. + Perhaps this bug fix can be made more efficient by invoking + waitpid once on a process group instead of invoking it on each + known child, but at least this code fixes the bug. + * process.c (deleted_pid_list, Fdelete_process, create_process) + (wait_reading_process_output, Fsignal_process) + (record_child_status_change, handle_child_signal) + (deliver_child_signal, init_process_emacs, syms_of_process): + * sysdep.c (emacs_sigaction_init, init_signals): + Assume SIGCHLD is defined. This is true on all Emacs porting + targets nowadays, and the code no longer works otherwise. + * process.c (create_process, record_child_status_change): + Don't use special value -1 in pid field, as the caller now must + know the pid rather than having the callee infer it. The + inference was sometimes incorrect anyway, due to another race. + (process_status_retrieved): New function. + (record_child_status_change): Use it. Return bool. + Accept negative 1st argument, which means to wait for one of + the processes that Emacs already knows about. + (handle_child_signal): Let record_child_status_change do all + the work, since we do not want to reap all exited child processes, + only the child processes that Emacs itself created. + * syswait.h: Adjust to above API changes. + Don't assume process IDs fit in int. * emacs.c (shut_down_emacs) [!DOS_NT]: * sysdep.c (sys_suspend) [SIGTSTP && !MSDOS]: === modified file 'src/process.c' --- src/process.c 2012-10-19 19:25:18 +0000 +++ src/process.c 2012-10-25 05:29:40 +0000 @@ -792,14 +792,11 @@ } -#ifdef SIGCHLD /* Fdelete_process promises to immediately forget about the process, but in reality, Emacs needs to remember those processes until they have been - treated by the SIGCHLD handler; otherwise this handler would consider the - process as being synchronous and say that the synchronous process is - dead. */ + treated by the SIGCHLD handler and waitpid has been invoked on them; + otherwise they might fill up the kernel's process table. */ static Lisp_Object deleted_pid_list; -#endif DEFUN ("delete-process", Fdelete_process, Sdelete_process, 1, 1, 0, doc: /* Delete PROCESS: kill it and forget about it immediately. @@ -822,7 +819,6 @@ } else if (p->infd >= 0) { -#ifdef SIGCHLD Lisp_Object symbol; pid_t pid = p->pid; @@ -840,7 +836,6 @@ deleted_pid_list = Fdelete (make_fixnum_or_float (pid), deleted_pid_list); else -#endif { Fkill_process (process, Qnil); /* Do this now, since remove_process will make the @@ -1598,9 +1593,7 @@ #if !defined (WINDOWSNT) && defined (FD_CLOEXEC) int wait_child_setup[2]; #endif -#ifdef SIGCHLD sigset_t blocked; -#endif /* Use volatile to protect variables from being clobbered by vfork. */ volatile int forkin, forkout; volatile int pty_flag = 0; @@ -1704,29 +1697,18 @@ if (inchannel > max_process_desc) max_process_desc = inchannel; - /* Until we store the proper pid, enable the SIGCHLD handler - to recognize an unknown pid as standing for this process. - It is very important not to let this `marker' value stay - in the table after this function has returned; if it does - it might cause call-process to hang and subsequent asynchronous - processes to get their return values scrambled. */ - XPROCESS (process)->pid = -1; - - /* This must be called after the above line because it may signal an - error. */ + /* This may signal an error. */ setup_process_coding_systems (process); encoded_current_dir = ENCODE_FILE (current_dir); block_input (); -#ifdef SIGCHLD /* Block SIGCHLD until we have a chance to store the new fork's pid in its process structure. */ sigemptyset (&blocked); sigaddset (&blocked, SIGCHLD); pthread_sigmask (SIG_BLOCK, &blocked, 0); -#endif #if HAVE_WORKING_VFORK /* child_setup must clobber environ on systems with true vfork. @@ -1860,10 +1842,8 @@ /* Emacs ignores SIGPIPE, but the child should not. */ signal (SIGPIPE, SIG_DFL); -#ifdef SIGCHLD /* Stop blocking signals in the child. */ pthread_sigmask (SIG_SETMASK, &empty_mask, 0); -#endif if (pty_flag) child_setup_tty (xforkout); @@ -1888,9 +1868,7 @@ XPROCESS (process)->pid = pid; /* Stop blocking signals in the parent. */ -#ifdef SIGCHLD pthread_sigmask (SIG_SETMASK, &empty_mask, 0); -#endif unblock_input (); if (pid < 0) @@ -4923,11 +4901,7 @@ #endif /* HAVE_PTYS */ /* If we can detect process termination, don't consider the process gone just because its pipe is closed. */ -#ifdef SIGCHLD - else if (nread == 0 && !NETCONN_P (proc) && !SERIALCONN_P (proc)) - ; -#endif - else + else if (nread != 0 || NETCONN_P (proc) || SERIALCONN_P (proc)) { /* Preserve status of processes already terminated. */ XPROCESS (proc)->tick = ++process_tick; @@ -6161,9 +6135,7 @@ #ifdef SIGCONT parse_signal ("cont", SIGCONT); #endif -#ifdef SIGCHLD parse_signal ("chld", SIGCHLD); -#endif #ifdef SIGTTIN parse_signal ("ttin", SIGTTIN); #endif @@ -6279,9 +6251,30 @@ return process; } -/* On receipt of a signal that a child status has changed, loop asking - about children with changed statuses until the system says there - are no more. +/* If the status of the process DESIRED has changed, return true and + set *STATUS to its exit status; otherwise, return false. + If HAVE is nonnegative, assume that HAVE = waitpid (HAVE, STATUS, ...) + has already been invoked, and do not invoke waitpid again. */ + +static bool +process_status_retrieved (pid_t desired, pid_t have, int *status) +{ + if (have < 0) + { + do + have = waitpid (desired, status, WNOHANG | WUNTRACED); + while (have < 0 && errno == EINTR); + } + + return have == desired; +} + +/* If PID is nonnegative, the child process PID with wait status W has + changed its status; record this and return true. + + If PID is negative, ignore W, and look for a known child process + of Emacs whose status has changed. If one is found, record its new + status and return true; otherwise, return false. All we do is change the status; we do not run sentinels or print notifications. That is saved for the next time keyboard input is @@ -6304,11 +6297,9 @@ ** Malloc WARNING: This should never call malloc either directly or indirectly; if it does, that is a bug */ -/* Record the changed status of the child process PID with wait status W. */ -void +bool record_child_status_change (pid_t pid, int w) { -#ifdef SIGCHLD Lisp_Object proc; struct Lisp_Process *p; Lisp_Object tail; @@ -6319,11 +6310,18 @@ for (tail = deleted_pid_list; CONSP (tail); tail = XCDR (tail)) { Lisp_Object xpid = XCAR (tail); - if ((INTEGERP (xpid) && pid == XINT (xpid)) - || (FLOATP (xpid) && pid == XFLOAT_DATA (xpid))) + bool all_pids_are_fixnums + = (MOST_NEGATIVE_FIXNUM <= TYPE_MINIMUM (pid_t) + && TYPE_MAXIMUM (pid_t) <= MOST_POSITIVE_FIXNUM); + pid_t deleted_pid; + if (all_pids_are_fixnums || INTEGERP (xpid)) + deleted_pid = XINT (xpid); + else + deleted_pid = XFLOAT_DATA (xpid); + if (process_status_retrieved (deleted_pid, pid, &w)) { XSETCAR (tail, Qnil); - return; + return 1; } } @@ -6333,23 +6331,11 @@ { proc = XCDR (XCAR (tail)); p = XPROCESS (proc); - if (EQ (p->type, Qreal) && p->pid == pid) + if (EQ (p->type, Qreal) && process_status_retrieved (p->pid, pid, &w)) break; p = 0; } - /* Look for an asynchronous process whose pid hasn't been filled - in yet. */ - if (! p) - for (tail = Vprocess_alist; CONSP (tail); tail = XCDR (tail)) - { - proc = XCDR (XCAR (tail)); - p = XPROCESS (proc); - if (p->pid == -1) - break; - p = 0; - } - /* Change the status of the process that was found. */ if (p) { @@ -6375,11 +6361,14 @@ look around. */ if (input_available_clear_time) *input_available_clear_time = make_emacs_time (0, 0); + + return 1; } - /* There was no asynchronous process found for that pid: we have - a synchronous process. */ - else + else if (0 <= pid) { + /* The caller successfully waited for a pid but no asynchronous + process was found for it, so this is a synchronous process. */ + synch_process_alive = 0; /* Report the status of the synchronous process. */ @@ -6392,11 +6381,13 @@ look around. */ if (input_available_clear_time) *input_available_clear_time = make_emacs_time (0, 0); + + return 1; } -#endif + + return 0; } -#ifdef SIGCHLD /* On some systems, the SIGCHLD handler must return right away. If any more processes want to signal us, we will get another signal. @@ -6413,23 +6404,8 @@ static void handle_child_signal (int sig) { - do - { - pid_t pid; - int status; - - do - pid = waitpid (-1, &status, WNOHANG | WUNTRACED); - while (pid < 0 && errno == EINTR); - - /* PID == 0 means no processes found, PID == -1 means a real failure. - Either way, we have done all our job. */ - if (pid <= 0) - break; - - record_child_status_change (pid, status); - } - while (CAN_HANDLE_MULTIPLE_CHILDREN); + while (record_child_status_change (-1, 0) && CAN_HANDLE_MULTIPLE_CHILDREN) + continue; } static void @@ -6438,7 +6414,6 @@ deliver_process_signal (sig, handle_child_signal); } -#endif /* SIGCHLD */ static Lisp_Object @@ -7287,7 +7262,6 @@ inhibit_sentinels = 0; -#ifdef SIGCHLD #ifndef CANNOT_DUMP if (! noninteractive || initialized) #endif @@ -7296,7 +7270,6 @@ emacs_sigaction_init (&action, deliver_child_signal); sigaction (SIGCHLD, &action, 0); } -#endif FD_ZERO (&input_wait_mask); FD_ZERO (&non_keyboard_wait_mask); @@ -7323,9 +7296,7 @@ #endif Vprocess_alist = Qnil; -#ifdef SIGCHLD deleted_pid_list = Qnil; -#endif for (i = 0; i < MAXDESC; i++) { chan_process[i] = Qnil; @@ -7454,9 +7425,7 @@ DEFSYM (Qlast_nonmenu_event, "last-nonmenu-event"); staticpro (&Vprocess_alist); -#ifdef SIGCHLD staticpro (&deleted_pid_list); -#endif #endif /* subprocesses */ === modified file 'src/sysdep.c' --- src/sysdep.c 2012-10-25 04:35:39 +0000 +++ src/sysdep.c 2012-10-25 05:29:40 +0000 @@ -1444,9 +1444,7 @@ /* When handling a signal, block nonfatal system signals that are caught by Emacs. This makes race conditions less likely. */ sigaddset (&action->sa_mask, SIGALRM); -#ifdef SIGCHLD sigaddset (&action->sa_mask, SIGCHLD); -#endif #ifdef SIGDANGER sigaddset (&action->sa_mask, SIGDANGER); #endif @@ -1635,9 +1633,7 @@ # ifdef SIGCLD sys_siglist[SIGCLD] = "Child status changed"; # endif -# ifdef SIGCHLD sys_siglist[SIGCHLD] = "Child status changed"; -# endif # ifdef SIGCONT sys_siglist[SIGCONT] = "Continued"; # endif === modified file 'src/syswait.h' --- src/syswait.h 2012-09-23 22:25:22 +0000 +++ src/syswait.h 2012-10-25 05:29:40 +0000 @@ -23,6 +23,7 @@ #ifndef EMACS_SYSWAIT_H #define EMACS_SYSWAIT_H +#include #include #ifdef HAVE_SYS_WAIT_H /* We have sys/wait.h with POSIXoid definitions. */ @@ -52,7 +53,7 @@ #endif /* Defined in process.c. */ -extern void record_child_status_change (pid_t, int); +extern bool record_child_status_change (pid_t, int); /* Defined in sysdep.c. */ extern void wait_for_termination (pid_t);