unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
* [PATCH] initial spawn implementation using vfork
@ 2016-02-27  2:14 Eric Wong
  2016-02-27 11:00 ` [PATCH v2] " Eric Wong
  0 siblings, 1 reply; 3+ messages in thread
From: Eric Wong @ 2016-02-27  2:14 UTC (permalink / raw)
  To: meta

Under Linux, vfork maintains constant performance as
parent process size increases.  fork needs to prepare pages
for copy-on-write, requiring a linear scan of the address
space.
---
 lib/PublicInbox/Spawn.pm   | 145 +++++++++++++++++++++++++++++++++++++++++++++
 lib/PublicInbox/SpawnPP.pm |  33 +++++++++++
 t/spawn.t                  |  53 +++++++++++++++++
 3 files changed, 231 insertions(+)
 create mode 100644 lib/PublicInbox/Spawn.pm
 create mode 100644 lib/PublicInbox/SpawnPP.pm
 create mode 100644 t/spawn.t

diff --git a/lib/PublicInbox/Spawn.pm b/lib/PublicInbox/Spawn.pm
new file mode 100644
index 0000000..ecc0aef
--- /dev/null
+++ b/lib/PublicInbox/Spawn.pm
@@ -0,0 +1,145 @@
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+package PublicInbox::Spawn;
+use strict;
+use warnings;
+use base qw(Exporter);
+our @EXPORT_OK = qw/which spawn/;
+
+my $vfork_spawn = <<'VFORK_SPAWN';
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <unistd.h>
+#include <alloca.h>
+
+#define AV_ALLOCA(av, max) alloca((max = (av_len((av)) + 1)) * sizeof(char *))
+
+static void av2c_copy(char **dst, AV *src, I32 max)
+{
+	I32 i;
+
+	for (i = 0; i < max; i++) {
+		SV **sv = av_fetch(src, i, 0);
+		dst[i] = sv ? SvPV_nolen(*sv) : 0;
+	}
+	dst[max] = 0;
+}
+
+static void *deconst(const char *s)
+{
+	union { const char *in; void *out; } u;
+	u.in = s;
+	return u.out;
+}
+
+/* needs to be safe inside a vfork'ed process */
+static void xerr(const char *msg)
+{
+	struct iovec iov[3];
+	const char *err = strerror(errno); /* should be safe in practice */
+
+	iov[0].iov_base = deconst(msg);
+	iov[0].iov_len = strlen(msg);
+	iov[1].iov_base = deconst(err);
+	iov[1].iov_len = strlen(err);
+	iov[2].iov_base = deconst("\n");
+	iov[2].iov_len = 1;
+	writev(2, iov, 3);
+	_exit(1);
+}
+
+#define REDIR(var,fd) do { \
+	if (var != fd && dup2(var, fd) < 0) \
+		xerr("error redirecting std"#var ": "); \
+} while (0)
+
+/*
+ * unstable internal API.  This was easy to implement but does not
+ * support arbitrary redirects.  It'll be updated depending on
+ * whatever we'll need in the future.
+ * Be sure to update PublicInbox::SpawnPP if this changes
+ */
+int public_inbox_fork_exec(int in, int out, int err,
+			SV *file, SV *cmdref, SV *envref)
+{
+	AV *cmd = (AV *)SvRV(cmdref);
+	AV *env = (AV *)SvRV(envref);
+	const char *filename = SvPV_nolen(file);
+	pid_t pid;
+	char **argv, **envp;
+	I32 max;
+
+	argv = AV_ALLOCA(cmd, max);
+	av2c_copy(argv, cmd, max);
+
+	envp = AV_ALLOCA(env, max);
+	av2c_copy(envp, env, max);
+
+	pid = vfork();
+	if (pid == 0) {
+		REDIR(in, 0);
+		REDIR(out, 1);
+		REDIR(err, 2);
+		execve(filename, argv, envp);
+		_exit(1);
+	}
+
+	return (int)pid;
+}
+VFORK_SPAWN
+
+my $inline_dir = $ENV{PERL_INLINE_DIRECTORY};
+unless (defined $inline_dir && -d $inline_dir && -w _) {
+	$vfork_spawn = undef;
+}
+if (defined $vfork_spawn) {
+	# need 0.64 or later for locking in multi-process env
+	eval 'use Inline 0.64 C => $vfork_spawn';
+	if ($@) {
+		warn "Inline::C failed for vfork: $@\n";
+		$vfork_spawn = undef;
+	}
+}
+
+unless (defined $vfork_spawn) {
+	require PublicInbox::SpawnPP;
+	no warnings 'once';
+	*public_inbox_fork_exec = *PublicInbox::SpawnPP::public_inbox_fork_exec
+}
+
+sub which ($) {
+	my ($file) = @_;
+	foreach my $p (split(':', $ENV{PATH})) {
+		$p .= "/$file";
+		return $p if -x $p;
+	}
+	undef;
+}
+
+sub spawn ($;$$) {
+	my ($cmd, $env, $opts) = @_;
+	my $f = which($cmd->[0]);
+	my @env;
+	$opts ||= {};
+
+	my %env = $opts->{-env} ? () : %ENV;
+	if ($env) {
+		foreach my $k (keys %$env) {
+			my $v = $env->{$k};
+			if (defined $v) {
+				$env{$k} = $v;
+			} else {
+				delete $env{$k};
+			}
+		}
+	}
+	while (my ($k, $v) = each %env) {
+		push @env, "$k=$v";
+	}
+	my $in = $opts->{0} || 0;
+	my $out = $opts->{1} || 1;
+	my $err = $opts->{2} || 2;
+	public_inbox_fork_exec($in, $out, $err, $f, $cmd, \@env);
+}
+
+1;
diff --git a/lib/PublicInbox/SpawnPP.pm b/lib/PublicInbox/SpawnPP.pm
new file mode 100644
index 0000000..ae552dd
--- /dev/null
+++ b/lib/PublicInbox/SpawnPP.pm
@@ -0,0 +1,33 @@
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+package PublicInbox::SpawnPP;
+use strict;
+use warnings;
+use POSIX qw(dup2);
+
+# Pure Perl implementation for folks that do not use Inline::C
+sub public_inbox_fork_exec ($$$$$$) {
+	my ($in, $out, $err, $f, $cmd, $env) = @_;
+	my $pid = fork;
+	if ($pid == 0) {
+		if ($in != 0) {
+			dup2($in, 0) or die "dup2 failed for stdin: $!";
+		}
+		if ($out != 1) {
+			dup2($out, 1) or die "dup2 failed for stdout: $!";
+		}
+		if ($err != 2) {
+			dup2($err, 2) or die "dup2 failed for stderr$!";
+		}
+		%ENV = ();
+		foreach my $e (@$env) {
+			my ($k, $v) = split('=', $e, 2);
+			$ENV{$k} = $v;
+		}
+		exec @$cmd;
+		exit 1;
+	}
+	$pid;
+}
+
+1;
diff --git a/t/spawn.t b/t/spawn.t
new file mode 100644
index 0000000..ed9b5b0
--- /dev/null
+++ b/t/spawn.t
@@ -0,0 +1,53 @@
+# Copyright (C) 2015 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Test::More;
+use PublicInbox::Spawn qw(which spawn);
+
+{
+	my $true = which('true');
+	ok($true, "'true' command found with which()");
+}
+
+{
+	my $pid = spawn(['true']);
+	ok($pid, 'spawned process');
+	is(waitpid($pid, 0), $pid, 'waitpid succeeds on spawned process');
+	is($?, 0, 'true exited successfully');
+}
+
+{
+	my ($r, $w);
+	pipe $r, $w or die "pipe failed: $!";
+	my $pid = spawn(['echo', 'hello world'], undef, { 1 => fileno($w) });
+	close $w or die "close pipe[1] failed: $!";
+	is(<$r>, "hello world\n", 'read stdout of spawned from pipe');
+	is(waitpid($pid, 0), $pid, 'waitpid succeeds on spawned process');
+	is($?, 0, 'true exited successfully');
+}
+
+{
+	my ($r, $w);
+	pipe $r, $w or die "pipe failed: $!";
+	my $pid = spawn(['sh', '-c', 'echo $HELLO'],
+		{ 'HELLO' => 'world' }, { 1 => fileno($w) });
+	close $w or die "close pipe[1] failed: $!";
+	is(<$r>, "world\n", 'read stdout of spawned from pipe');
+	is(waitpid($pid, 0), $pid, 'waitpid succeeds on spawned process');
+	is($?, 0, 'sh exited successfully');
+}
+
+{
+	my ($r, $w);
+	pipe $r, $w or die "pipe failed: $!";
+	my $pid = spawn(['env'], {}, { -env => 1, 1 => fileno($w) });
+	close $w or die "close pipe[1] failed: $!";
+	ok(!defined(<$r>), 'read stdout of spawned from pipe');
+	is(waitpid($pid, 0), $pid, 'waitpid succeeds on spawned process');
+	is($?, 0, 'env(1) exited successfully');
+}
+
+done_testing();
+
+1;
-- 
EW


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH v2] initial spawn implementation using vfork
  2016-02-27  2:14 [PATCH] initial spawn implementation using vfork Eric Wong
@ 2016-02-27 11:00 ` Eric Wong
  2016-02-27 22:19   ` [PATCH 2/1] spawn: fail properly if Inline fails Eric Wong
  0 siblings, 1 reply; 3+ messages in thread
From: Eric Wong @ 2016-02-27 11:00 UTC (permalink / raw)
  To: meta

Under Linux, vfork maintains constant performance as
parent process size increases.  fork needs to prepare pages
for copy-on-write, requiring a linear scan of the address
space.
---
 v2 changes:
 - xerr for execve failure
 - use flock explicitly for older Inline installs on wheezy
 - die early if which() fails

 lib/PublicInbox/Spawn.pm   | 152 +++++++++++++++++++++++++++++++++++++++++++++
 lib/PublicInbox/SpawnPP.pm |  33 ++++++++++
 t/spawn.t                  |  53 ++++++++++++++++
 3 files changed, 238 insertions(+)
 create mode 100644 lib/PublicInbox/Spawn.pm
 create mode 100644 lib/PublicInbox/SpawnPP.pm
 create mode 100644 t/spawn.t

diff --git a/lib/PublicInbox/Spawn.pm b/lib/PublicInbox/Spawn.pm
new file mode 100644
index 0000000..aa8d81b
--- /dev/null
+++ b/lib/PublicInbox/Spawn.pm
@@ -0,0 +1,152 @@
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+package PublicInbox::Spawn;
+use strict;
+use warnings;
+use base qw(Exporter);
+our @EXPORT_OK = qw/which spawn/;
+
+my $vfork_spawn = <<'VFORK_SPAWN';
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <unistd.h>
+#include <alloca.h>
+
+#define AV_ALLOCA(av, max) alloca((max = (av_len((av)) + 1)) * sizeof(char *))
+
+static void av2c_copy(char **dst, AV *src, I32 max)
+{
+	I32 i;
+
+	for (i = 0; i < max; i++) {
+		SV **sv = av_fetch(src, i, 0);
+		dst[i] = sv ? SvPV_nolen(*sv) : 0;
+	}
+	dst[max] = 0;
+}
+
+static void *deconst(const char *s)
+{
+	union { const char *in; void *out; } u;
+	u.in = s;
+	return u.out;
+}
+
+/* needs to be safe inside a vfork'ed process */
+static void xerr(const char *msg)
+{
+	struct iovec iov[3];
+	const char *err = strerror(errno); /* should be safe in practice */
+
+	iov[0].iov_base = deconst(msg);
+	iov[0].iov_len = strlen(msg);
+	iov[1].iov_base = deconst(err);
+	iov[1].iov_len = strlen(err);
+	iov[2].iov_base = deconst("\n");
+	iov[2].iov_len = 1;
+	writev(2, iov, 3);
+	_exit(1);
+}
+
+#define REDIR(var,fd) do { \
+	if (var != fd && dup2(var, fd) < 0) \
+		xerr("error redirecting std"#var ": "); \
+} while (0)
+
+/*
+ * unstable internal API.  This was easy to implement but does not
+ * support arbitrary redirects.  It'll be updated depending on
+ * whatever we'll need in the future.
+ * Be sure to update PublicInbox::SpawnPP if this changes
+ */
+int public_inbox_fork_exec(int in, int out, int err,
+			SV *file, SV *cmdref, SV *envref)
+{
+	AV *cmd = (AV *)SvRV(cmdref);
+	AV *env = (AV *)SvRV(envref);
+	const char *filename = SvPV_nolen(file);
+	pid_t pid;
+	char **argv, **envp;
+	I32 max;
+
+	argv = AV_ALLOCA(cmd, max);
+	av2c_copy(argv, cmd, max);
+
+	envp = AV_ALLOCA(env, max);
+	av2c_copy(envp, env, max);
+
+	pid = vfork();
+	if (pid == 0) {
+		REDIR(in, 0);
+		REDIR(out, 1);
+		REDIR(err, 2);
+		execve(filename, argv, envp);
+		xerr("execve failed");
+	}
+
+	return (int)pid;
+}
+VFORK_SPAWN
+
+my $inline_dir = $ENV{PERL_INLINE_DIRECTORY};
+$vfork_spawn = undef unless defined $inline_dir && -d $inline_dir && -w _;
+if (defined $vfork_spawn) {
+	# Inline 0.64 or later has locking in multi-process env,
+	# but we support 0.5 on Debian wheezy
+	use Fcntl qw(:flock);
+	eval {
+		my $f = "$inline_dir/.public-inbox.lock";
+		open my $fh, '>', $f or die "failed to open $f: $!\n";
+		flock($fh, LOCK_EX) or die "LOCK_EX failed on $f: $!\n";
+		eval 'use Inline C => $vfork_spawn';
+		flock($fh, LOCK_UN) or die "LOCK_UN failed on $f: $!\n";
+	};
+	if ($@) {
+		warn "Inline::C failed for vfork: $@\n";
+		$vfork_spawn = undef;
+	}
+}
+
+unless (defined $vfork_spawn) {
+	require PublicInbox::SpawnPP;
+	no warnings 'once';
+	*public_inbox_fork_exec = *PublicInbox::SpawnPP::public_inbox_fork_exec
+}
+
+sub which ($) {
+	my ($file) = @_;
+	foreach my $p (split(':', $ENV{PATH})) {
+		$p .= "/$file";
+		return $p if -x $p;
+	}
+	undef;
+}
+
+sub spawn ($;$$) {
+	my ($cmd, $env, $opts) = @_;
+	my $f = which($cmd->[0]);
+	defined $f or die "$cmd->[0]: command not found\n";
+	my @env;
+	$opts ||= {};
+
+	my %env = $opts->{-env} ? () : %ENV;
+	if ($env) {
+		foreach my $k (keys %$env) {
+			my $v = $env->{$k};
+			if (defined $v) {
+				$env{$k} = $v;
+			} else {
+				delete $env{$k};
+			}
+		}
+	}
+	while (my ($k, $v) = each %env) {
+		push @env, "$k=$v";
+	}
+	my $in = $opts->{0} || 0;
+	my $out = $opts->{1} || 1;
+	my $err = $opts->{2} || 2;
+	public_inbox_fork_exec($in, $out, $err, $f, $cmd, \@env);
+}
+
+1;
diff --git a/lib/PublicInbox/SpawnPP.pm b/lib/PublicInbox/SpawnPP.pm
new file mode 100644
index 0000000..ae552dd
--- /dev/null
+++ b/lib/PublicInbox/SpawnPP.pm
@@ -0,0 +1,33 @@
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+package PublicInbox::SpawnPP;
+use strict;
+use warnings;
+use POSIX qw(dup2);
+
+# Pure Perl implementation for folks that do not use Inline::C
+sub public_inbox_fork_exec ($$$$$$) {
+	my ($in, $out, $err, $f, $cmd, $env) = @_;
+	my $pid = fork;
+	if ($pid == 0) {
+		if ($in != 0) {
+			dup2($in, 0) or die "dup2 failed for stdin: $!";
+		}
+		if ($out != 1) {
+			dup2($out, 1) or die "dup2 failed for stdout: $!";
+		}
+		if ($err != 2) {
+			dup2($err, 2) or die "dup2 failed for stderr$!";
+		}
+		%ENV = ();
+		foreach my $e (@$env) {
+			my ($k, $v) = split('=', $e, 2);
+			$ENV{$k} = $v;
+		}
+		exec @$cmd;
+		exit 1;
+	}
+	$pid;
+}
+
+1;
diff --git a/t/spawn.t b/t/spawn.t
new file mode 100644
index 0000000..ed9b5b0
--- /dev/null
+++ b/t/spawn.t
@@ -0,0 +1,53 @@
+# Copyright (C) 2015 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Test::More;
+use PublicInbox::Spawn qw(which spawn);
+
+{
+	my $true = which('true');
+	ok($true, "'true' command found with which()");
+}
+
+{
+	my $pid = spawn(['true']);
+	ok($pid, 'spawned process');
+	is(waitpid($pid, 0), $pid, 'waitpid succeeds on spawned process');
+	is($?, 0, 'true exited successfully');
+}
+
+{
+	my ($r, $w);
+	pipe $r, $w or die "pipe failed: $!";
+	my $pid = spawn(['echo', 'hello world'], undef, { 1 => fileno($w) });
+	close $w or die "close pipe[1] failed: $!";
+	is(<$r>, "hello world\n", 'read stdout of spawned from pipe');
+	is(waitpid($pid, 0), $pid, 'waitpid succeeds on spawned process');
+	is($?, 0, 'true exited successfully');
+}
+
+{
+	my ($r, $w);
+	pipe $r, $w or die "pipe failed: $!";
+	my $pid = spawn(['sh', '-c', 'echo $HELLO'],
+		{ 'HELLO' => 'world' }, { 1 => fileno($w) });
+	close $w or die "close pipe[1] failed: $!";
+	is(<$r>, "world\n", 'read stdout of spawned from pipe');
+	is(waitpid($pid, 0), $pid, 'waitpid succeeds on spawned process');
+	is($?, 0, 'sh exited successfully');
+}
+
+{
+	my ($r, $w);
+	pipe $r, $w or die "pipe failed: $!";
+	my $pid = spawn(['env'], {}, { -env => 1, 1 => fileno($w) });
+	close $w or die "close pipe[1] failed: $!";
+	ok(!defined(<$r>), 'read stdout of spawned from pipe');
+	is(waitpid($pid, 0), $pid, 'waitpid succeeds on spawned process');
+	is($?, 0, 'env(1) exited successfully');
+}
+
+done_testing();
+
+1;
-- 
EW


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH 2/1] spawn: fail properly if Inline fails
  2016-02-27 11:00 ` [PATCH v2] " Eric Wong
@ 2016-02-27 22:19   ` Eric Wong
  0 siblings, 0 replies; 3+ messages in thread
From: Eric Wong @ 2016-02-27 22:19 UTC (permalink / raw)
  To: meta

We must stash the error correctly when nesting evals, oops :x
---
 lib/PublicInbox/Spawn.pm | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lib/PublicInbox/Spawn.pm b/lib/PublicInbox/Spawn.pm
index 394a0b4..72cd6c3 100644
--- a/lib/PublicInbox/Spawn.pm
+++ b/lib/PublicInbox/Spawn.pm
@@ -111,7 +111,9 @@ if (defined $vfork_spawn) {
 		open my $fh, '>', $f or die "failed to open $f: $!\n";
 		flock($fh, LOCK_EX) or die "LOCK_EX failed on $f: $!\n";
 		eval 'use Inline C => $vfork_spawn';
+		my $err = $@;
 		flock($fh, LOCK_UN) or die "LOCK_UN failed on $f: $!\n";
+		die $err if $err;
 	};
 	if ($@) {
 		warn "Inline::C failed for vfork: $@\n";
-- 
EW


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2016-02-27 22:19 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-02-27  2:14 [PATCH] initial spawn implementation using vfork Eric Wong
2016-02-27 11:00 ` [PATCH v2] " Eric Wong
2016-02-27 22:19   ` [PATCH 2/1] spawn: fail properly if Inline fails Eric Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).