unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
* [PATCH 0/3] ds fix and cleanups
@ 2023-10-04  8:50 Eric Wong
  2023-10-04  8:50 ` [PATCH 1/3] ds: Reset: replace Poller object early Eric Wong
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Eric Wong @ 2023-10-04  8:50 UTC (permalink / raw)
  To: meta

1/3 fixes a bug I noticed in syslog during a deploy.
The rest hopefully make things more robust and less
error-prone from typos.

Eric Wong (3):
  ds: Reset: replace Poller object early
  ds: cleanup fork + Reset support
  ds: make %AWAIT_PIDS a hash, not hashref

 lib/PublicInbox/DS.pm  | 39 +++++++++++++++++++++------------------
 lib/PublicInbox/LEI.pm |  2 +-
 2 files changed, 22 insertions(+), 19 deletions(-)

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 1/3] ds: Reset: replace Poller object early
  2023-10-04  8:50 [PATCH 0/3] ds fix and cleanups Eric Wong
@ 2023-10-04  8:50 ` Eric Wong
  2023-10-04  8:50 ` [PATCH 2/3] ds: cleanup fork + Reset support Eric Wong
  2023-10-04  8:50 ` [PATCH 3/3] ds: make %AWAIT_PIDS a hash, not hashref Eric Wong
  2 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2023-10-04  8:50 UTC (permalink / raw)
  To: meta

Process shutdown can be chaotic and unpredictable.  Try to make
it more predictable by ensuring any PublicInbox::Select object
can't hold references to any objects.

This should fix the following error I saw in syslog during a deploy:

	Can't call method "FILENO" on an undefined value at
	.../PublicInbox/Select.pm line 34 during global destruction.

Replacing $Poller with PublicInbox::Select (instead of undef-ing
it) means we can avoid adding branches to ->epwait and ->close
before calls to ->ep_mod and ->ep_del, respectively.
---
 lib/PublicInbox/DS.pm | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/lib/PublicInbox/DS.pm b/lib/PublicInbox/DS.pm
index d8824a55..e085a010 100644
--- a/lib/PublicInbox/DS.pm
+++ b/lib/PublicInbox/DS.pm
@@ -69,7 +69,11 @@ Reset all state
 sub Reset {
 	do {
 		$in_loop = undef; # first in case DESTROY callbacks use this
-		%DescriptorMap = ();
+		# clobbering $Poller may call DSKQXS::DESTROY,
+		# we must always have this set to something to avoid
+		# needing branches before ep_del/ep_mod calls (via ->close).
+		$Poller = PublicInbox::Select->new;
+		%DescriptorMap = (); # likely to call ep_del
 		@Timers = ();
 		%UniqTimer = ();
 		@post_loop_do = ();
@@ -77,8 +81,8 @@ sub Reset {
 		# we may be iterating inside one of these on our stack
 		my @q = delete @Stack{keys %Stack};
 		for my $q (@q) { @$q = () }
-		$AWAIT_PIDS = $nextq = $ToClose = undef;
-		$Poller = undef; # may call DSKQXS::DESTROY
+		$AWAIT_PIDS = $nextq = $ToClose = undef; # may call ep_del
+		$Poller = PublicInbox::Select->new;
 	} while (@Timers || keys(%Stack) || $nextq || $AWAIT_PIDS ||
 		$ToClose || keys(%DescriptorMap) ||
 		@post_loop_do || keys(%UniqTimer));

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 2/3] ds: cleanup fork + Reset support
  2023-10-04  8:50 [PATCH 0/3] ds fix and cleanups Eric Wong
  2023-10-04  8:50 ` [PATCH 1/3] ds: Reset: replace Poller object early Eric Wong
@ 2023-10-04  8:50 ` Eric Wong
  2023-10-04  8:50 ` [PATCH 3/3] ds: make %AWAIT_PIDS a hash, not hashref Eric Wong
  2 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2023-10-04  8:50 UTC (permalink / raw)
  To: meta

We used to have many entries for %Stack, but nowadays it's just
the one used by next_tick, so just replace it a $cur_runq variable.

I'm reducing reliance on hash keys for things with global scope
to ensure typos can be detected (strict||v5.12 forces us to fix
uses of undeclared variables, but they can't detect typos in
hash keys.
---
 lib/PublicInbox/DS.pm | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/lib/PublicInbox/DS.pm b/lib/PublicInbox/DS.pm
index e085a010..8f77b6ad 100644
--- a/lib/PublicInbox/DS.pm
+++ b/lib/PublicInbox/DS.pm
@@ -36,11 +36,11 @@ use Errno qw(EAGAIN EINVAL ECHILD);
 use Carp qw(carp croak);
 our @EXPORT_OK = qw(now msg_more awaitpid add_timer add_uniq_timer);
 
-my %Stack;
 my $nextq; # queue for next_tick
 my $reap_armed;
 my $ToClose; # sockets to close when event loop is done
 our ($AWAIT_PIDS, # pid => [ $callback, @args ]
+	$cur_runq, # only set inside next_tick
      %DescriptorMap,             # fd (num) -> PublicInbox::DS object
      $Poller, # global Select, Epoll, DSPoll, or DSKQXS ref
 
@@ -78,14 +78,14 @@ sub Reset {
 		%UniqTimer = ();
 		@post_loop_do = ();
 
-		# we may be iterating inside one of these on our stack
-		my @q = delete @Stack{keys %Stack};
-		for my $q (@q) { @$q = () }
+		# we may be called from an *atfork_child inside next_tick:
+		@$cur_runq = () if $cur_runq;
 		$AWAIT_PIDS = $nextq = $ToClose = undef; # may call ep_del
 		$Poller = PublicInbox::Select->new;
-	} while (@Timers || keys(%Stack) || $nextq || $AWAIT_PIDS ||
+	} while (@Timers || $nextq || $AWAIT_PIDS ||
 		$ToClose || keys(%DescriptorMap) ||
-		@post_loop_do || keys(%UniqTimer));
+		@post_loop_do || keys(%UniqTimer) ||
+		scalar(@{$cur_runq // []})); # do not vivify cur_runq
 
 	$reap_armed = undef;
 	$LoopTimeout = -1;  # no timeout by default
@@ -145,10 +145,9 @@ sub _InitPoller () {
 sub now () { clock_gettime(CLOCK_MONOTONIC) }
 
 sub next_tick () {
-	my $q = $nextq or return;
+	local $cur_runq = $nextq or return;
 	$nextq = undef;
-	$Stack{cur_runq} = $q;
-	for my $obj (@$q) {
+	for my $obj (@$cur_runq) {
 		# avoid "ref" on blessed refs to workaround a Perl 5.16.3 leak:
 		# https://rt.perl.org/Public/Bug/Display.html?id=114340
 		if (blessed($obj)) {
@@ -157,7 +156,6 @@ sub next_tick () {
 			$obj->();
 		}
 	}
-	delete $Stack{cur_runq};
 }
 
 # runs timers and returns milliseconds for next one, or next event loop

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 3/3] ds: make %AWAIT_PIDS a hash, not hashref
  2023-10-04  8:50 [PATCH 0/3] ds fix and cleanups Eric Wong
  2023-10-04  8:50 ` [PATCH 1/3] ds: Reset: replace Poller object early Eric Wong
  2023-10-04  8:50 ` [PATCH 2/3] ds: cleanup fork + Reset support Eric Wong
@ 2023-10-04  8:50 ` Eric Wong
  2 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2023-10-04  8:50 UTC (permalink / raw)
  To: meta

This is more persistent than some of the others and we don't
swap it on use (unlike $nextq or $ToClose).  In other words,
it's helpful for communicating its lifetime expectancy is
close to %DescriptorMap and not like to queue-type things
such as $ToClose.
---
 lib/PublicInbox/DS.pm  | 15 ++++++++-------
 lib/PublicInbox/LEI.pm |  2 +-
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/lib/PublicInbox/DS.pm b/lib/PublicInbox/DS.pm
index 8f77b6ad..26cc83f0 100644
--- a/lib/PublicInbox/DS.pm
+++ b/lib/PublicInbox/DS.pm
@@ -39,7 +39,7 @@ our @EXPORT_OK = qw(now msg_more awaitpid add_timer add_uniq_timer);
 my $nextq; # queue for next_tick
 my $reap_armed;
 my $ToClose; # sockets to close when event loop is done
-our ($AWAIT_PIDS, # pid => [ $callback, @args ]
+our (%AWAIT_PIDS, # pid => [ $callback, @args ]
 	$cur_runq, # only set inside next_tick
      %DescriptorMap,             # fd (num) -> PublicInbox::DS object
      $Poller, # global Select, Epoll, DSPoll, or DSKQXS ref
@@ -80,9 +80,10 @@ sub Reset {
 
 		# we may be called from an *atfork_child inside next_tick:
 		@$cur_runq = () if $cur_runq;
-		$AWAIT_PIDS = $nextq = $ToClose = undef; # may call ep_del
+		$nextq = $ToClose = undef; # may call ep_del
+		%AWAIT_PIDS = ();
 		$Poller = PublicInbox::Select->new;
-	} while (@Timers || $nextq || $AWAIT_PIDS ||
+	} while (@Timers || $nextq || keys(%AWAIT_PIDS) ||
 		$ToClose || keys(%DescriptorMap) ||
 		@post_loop_do || keys(%UniqTimer) ||
 		scalar(@{$cur_runq // []})); # do not vivify cur_runq
@@ -218,7 +219,7 @@ sub reap_pids {
 	$reap_armed = undef;
 	while (1) {
 		my $pid = waitpid(-1, WNOHANG) or return;
-		if (defined(my $cb_args = delete $AWAIT_PIDS->{$pid})) {
+		if (defined(my $cb_args = delete $AWAIT_PIDS{$pid})) {
 			await_cb($pid, @$cb_args) if $cb_args;
 		} elsif ($pid == -1 && $! == ECHILD) {
 			return requeue(\&dflush); # force @post_loop_do to run
@@ -719,17 +720,17 @@ sub long_response ($$;@) {
 
 sub awaitpid {
 	my ($pid, @cb_args) = @_; # @cb_args = ($cb, @args), $cb may be undef
-	$AWAIT_PIDS->{$pid} = \@cb_args if @cb_args;
+	$AWAIT_PIDS{$pid} = \@cb_args if @cb_args;
 	# provide synchronous API
 	if (defined(wantarray) || (!$in_loop && !@cb_args)) {
 		my $ret = waitpid($pid, 0);
 		if ($ret == $pid) {
-			my $cb_args = delete $AWAIT_PIDS->{$pid};
+			my $cb_args = delete $AWAIT_PIDS{$pid};
 			@cb_args = @$cb_args if !@cb_args && $cb_args;
 			await_cb($pid, @cb_args);
 		} else {
 			carp "waitpid($pid) => $ret ($!)";
-			delete $AWAIT_PIDS->{$pid};
+			delete $AWAIT_PIDS{$pid};
 		}
 		return $ret;
 	} elsif ($in_loop) { # We could've just missed our SIGCHLD, cover it, here:
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index 5f3147bf..e300f0a4 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -1302,7 +1302,7 @@ sub can_stay_alive { # PublicInbox::DS::post_loop_do cb
 		}
 	};
 	# returns true: continue, false: stop
-	$n + scalar(keys(%$PublicInbox::DS::AWAIT_PIDS));
+	$n + scalar(keys(%PublicInbox::DS::AWAIT_PIDS));
 }
 
 # lei(1) calls this when it can't connect

^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2023-10-04  8:50 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-10-04  8:50 [PATCH 0/3] ds fix and cleanups Eric Wong
2023-10-04  8:50 ` [PATCH 1/3] ds: Reset: replace Poller object early Eric Wong
2023-10-04  8:50 ` [PATCH 2/3] ds: cleanup fork + Reset support Eric Wong
2023-10-04  8:50 ` [PATCH 3/3] ds: make %AWAIT_PIDS a hash, not hashref Eric Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).