unofficial mirror of bug-gnu-emacs@gnu.org 
 help / color / mirror / code / Atom feed
From: Tom Gillespie <tgbugs@gmail.com>
To: 56487@debbugs.gnu.org
Cc: Po Lu <luangruo@yahoo.com>
Subject: bug#56487: xgselect race condition leading to abort when USE_GTK not defined
Date: Sun, 10 Jul 2022 19:50:08 -0700	[thread overview]
Message-ID: <CA+G3_PP-xUHs-XtoiV_xvnFVydY7-LQqTSL3BUGj_Vg=iWaCmg@mail.gmail.com> (raw)
In-Reply-To: <871qus1kkt.fsf@yahoo.com>

[-- Attachment #1: Type: text/plain, Size: 73 bytes --]

Here is (I think) the patch with correctly formatted change log entries.

[-- Attachment #2: 0001-xgselect.c-avoid-race-condition-leading-to-abort.patch --]
[-- Type: text/x-patch, Size: 5085 bytes --]

From b3a7c5715b199c9ce935a901d2eb02f238af0769 Mon Sep 17 00:00:00 2001
From: Tom Gillespie <tgbugs@gmail.com>
Date: Tue, 5 Jul 2022 13:43:25 -0700
Subject: [PATCH] xgselect.c: avoid race condition leading to abort

* src/xgselect.c (xg_select): Remove ifdefs that were previously USE_GTK
specific to be unconditional. This prevents a race condition caused by
a call to release_select_lock from triggering when configuring Emacs
with any toolkit other than gtk.
(release_select_lock): Add a branch to check whether
threads_holding_glib_lock has gone negative, and if so, restore to
zero.  In the case where there are multiple threads,
threads_holding_glib_lock could become and stay negative, prevending
g_main_context_release from ever being called, even when it should
have been.

As far as I can tell the way that the thread.c and xgselect.c code was
written was with the intention of avoiding additional locks. This means
that this code is exquisitely senstivie to slight changes in timing. A
comment in thread.c has been added at one location where this happens.

It is entirely possible that the removal of the ifdefs branching on
USE_GTK resolves this issue by slightly changing the timings when
using other/no toolkits so that the abort does not trigger. In all
cases aborts can be induced by adding something like fputs in thread.c
at the location of the new comment.

For the record, the abort behavior is not present in Emacs 27, and was
introduced by 9c62ffb08262c82b7e38e6eb5767f2087424aa47 (the bisect was
quite a pain, so hopefully no one ever needs to do it again).
---
 src/thread.c   |  5 +++++
 src/xgselect.c | 29 ++++++++++++-----------------
 2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/src/thread.c b/src/thread.c
index 626d14aad0..a0e85bb75c 100644
--- a/src/thread.c
+++ b/src/thread.c
@@ -612,6 +612,11 @@ really_call_select (void *arg)
   sa->result = (sa->func) (sa->max_fds, sa->rfds, sa->wfds, sa->efds,
 			   sa->timeout, sa->sigmask);
 
+  /* The exact timing of this call is critical. If you are debugging
+     and add a call to e.g. fputs before release_select_lock, then
+     the internal call to g_main_context_release can sometimes result
+     in a negative overflow of gmain context->owner_count, triggering
+     an abort. */
   release_select_lock ();
 
   block_interrupt_signal (&oldset);
diff --git a/src/xgselect.c b/src/xgselect.c
index 6e09a15fa8..9a79121617 100644
--- a/src/xgselect.c
+++ b/src/xgselect.c
@@ -39,12 +39,20 @@ Copyright (C) 2009-2022 Free Software Foundation, Inc.
 void
 release_select_lock (void)
 {
+  ptrdiff_t lthgl;
 #if GNUC_PREREQ (4, 7, 0)
-  if (__atomic_sub_fetch (&threads_holding_glib_lock, 1, __ATOMIC_ACQ_REL) == 0)
-    g_main_context_release (glib_main_context);
+  lthgl = __atomic_sub_fetch (&threads_holding_glib_lock, 1, __ATOMIC_ACQ_REL);
+  if (lthgl == 0)
+      g_main_context_release (glib_main_context);
+  else if (lthgl < 0)
+    /* reset to zero in the event that multiple threads cause this to go to -1 */
+    __atomic_fetch_add (&threads_holding_glib_lock, 1, __ATOMIC_ACQ_REL);
 #else
-  if (--threads_holding_glib_lock == 0)
+  lthgl = --threads_holding_glib_lock;
+  if (lthgl == 0)
     g_main_context_release (glib_main_context);
+  else if (lthgl < 0)
+    threads_holding_glib_lock++;
 #endif
 }
 
@@ -116,9 +124,7 @@ xg_select (int fds_lim, fd_set *rfds, fd_set *wfds, fd_set *efds,
   int n_gfds, retval = 0, our_fds = 0, max_fds = fds_lim - 1;
   int i, nfds, tmo_in_millisec, must_free = 0;
   bool need_to_dispatch;
-#ifdef USE_GTK
   bool already_has_events;
-#endif
 
   if (xg_select_suppress_count)
     return pselect (fds_lim, rfds, wfds, efds, timeout, sigmask);
@@ -126,8 +132,8 @@ xg_select (int fds_lim, fd_set *rfds, fd_set *wfds, fd_set *efds,
   context = g_main_context_default ();
   acquire_select_lock (context);
 
-#ifdef USE_GTK
   already_has_events = g_main_context_pending (context);
+#ifdef USE_GTK
 #ifndef HAVE_PGTK
   already_has_events = already_has_events && x_gtk_use_native_input;
 #endif
@@ -179,12 +185,6 @@ xg_select (int fds_lim, fd_set *rfds, fd_set *wfds, fd_set *efds,
 	tmop = &tmo;
     }
 
-#ifndef USE_GTK
-  fds_lim = max_fds + 1;
-  nfds = thread_select (pselect, fds_lim,
-			&all_rfds, have_wfds ? &all_wfds : NULL, efds,
-			tmop, sigmask);
-#else
   /* On PGTK, when you type a key, the key press event are received,
      and one more key press event seems to be received internally.
 
@@ -217,7 +217,6 @@ xg_select (int fds_lim, fd_set *rfds, fd_set *wfds, fd_set *efds,
 	FD_ZERO (efds);
       our_fds++;
     }
-#endif
 
   if (nfds < 0)
     retval = nfds;
@@ -248,11 +247,7 @@ xg_select (int fds_lim, fd_set *rfds, fd_set *wfds, fd_set *efds,
 
   /* If Gtk+ is in use eventually gtk_main_iteration will be called,
      unless retval is zero.  */
-#ifdef USE_GTK
   need_to_dispatch = retval == 0;
-#else
-  need_to_dispatch = true;
-#endif
 
   /* xwidgets make heavy use of GLib subprocesses, which add their own
      SIGCHLD handler at arbitrary locations.  That doesn't play well
-- 
2.35.1


  reply	other threads:[~2022-07-11  2:50 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-07-10 21:05 bug#56487: xgselect race condition leading to abort when USE_GTK not defined Tom Gillespie
2022-07-11  1:53 ` Po Lu via Bug reports for GNU Emacs, the Swiss army knife of text editors
2022-07-11  2:50   ` Tom Gillespie [this message]
2022-07-11  3:13     ` Po Lu via Bug reports for GNU Emacs, the Swiss army knife of text editors
2022-07-11  3:40       ` Tom Gillespie
2022-07-11 10:16         ` Po Lu via Bug reports for GNU Emacs, the Swiss army knife of text editors
2022-07-11 16:09           ` Tom Gillespie
2022-07-12  2:03             ` Po Lu via Bug reports for GNU Emacs, the Swiss army knife of text editors
2022-07-12  2:20               ` Tom Gillespie
2022-07-12 12:44                 ` Eli Zaretskii
2022-07-15  5:09                   ` Tom Gillespie
2023-09-07 18:30                   ` Stefan Kangas

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://www.gnu.org/software/emacs/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CA+G3_PP-xUHs-XtoiV_xvnFVydY7-LQqTSL3BUGj_Vg=iWaCmg@mail.gmail.com' \
    --to=tgbugs@gmail.com \
    --cc=56487@debbugs.gnu.org \
    --cc=luangruo@yahoo.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).