From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 3/5] use sendmsg w/ MSG_MORE to reduce syscalls
Date: Wed, 19 Jun 2024 23:41:02 +0000 [thread overview]
Message-ID: <20240619234104.80183-4-e@80x24.org> (raw)
In-Reply-To: <20240619234104.80183-1-e@80x24.org>
In places where we made multiple send(..., MSG_MORE) calls in
quick succession, we now use sendmsg(2) to provide the same
semantics with fewer syscalls. While this may be less efficient
inside the kernel for small messages, syscalls are expensive
nowadays and we can avoid userspace copies and large allocations
when streaming large HTTP chunks in /T/, /t/, and t.mbox.gz
endpoints.
This allows *BSD systems lacking MSG_MORE to save some syscalls
when writing HTTP chunked encoding, among other things.
---
MANIFEST | 1 +
lib/PublicInbox/DS.pm | 56 +++++++++++++++++++++++-------------
lib/PublicInbox/DSdeflate.pm | 11 ++++---
lib/PublicInbox/HTTP.pm | 11 ++-----
lib/PublicInbox/IMAP.pm | 14 ++++-----
lib/PublicInbox/Syscall.pm | 37 ++++++++++++++++++------
t/syscall.t | 14 +++++++++
7 files changed, 94 insertions(+), 50 deletions(-)
create mode 100644 t/syscall.t
diff --git a/MANIFEST b/MANIFEST
index 5796e05b..3fc01a43 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -610,6 +610,7 @@ t/solve/bare.patch
t/solver_git.t
t/spamcheck_spamc.t
t/spawn.t
+t/syscall.t
t/tail_notify.t
t/thread-cycle.t
t/thread-index-gap.t
diff --git a/lib/PublicInbox/DS.pm b/lib/PublicInbox/DS.pm
index 17a8a1df..c5f44860 100644
--- a/lib/PublicInbox/DS.pm
+++ b/lib/PublicInbox/DS.pm
@@ -35,7 +35,9 @@ use PublicInbox::Select;
use PublicInbox::OnDestroy;
use Errno qw(EAGAIN EINVAL ECHILD);
use Carp qw(carp croak);
+use List::Util qw(sum);
our @EXPORT_OK = qw(now msg_more awaitpid add_timer add_uniq_timer);
+my $sendmsg_more = PublicInbox::Syscall->can('sendmsg_more');
my $nextq; # queue for next_tick
my $reap_armed;
@@ -471,8 +473,8 @@ sub drop ($@) {
undef;
}
-sub tmpio ($$$) {
- my ($self, $bref, $off) = @_;
+sub tmpio ($$$;@) {
+ my ($self, $bref, $off, @rest) = @_;
my $fh = tmpfile 'wbuf', $self->{sock}, 1 or
return drop $self, "tmpfile $!";
$fh->autoflush(1);
@@ -480,6 +482,7 @@ sub tmpio ($$$) {
my $n = syswrite($fh, $$bref, $len, $off) //
return drop $self, "write ($len): $!";
$n == $len or return drop $self, "wrote $n < $len bytes";
+ @rest and (print $fh @rest or return drop $self, "print rest: $!");
[ $fh, 0 ] # [1] = offset, [2] = length, not set by us
}
@@ -548,30 +551,43 @@ sub write {
}
}
-use constant MSG_MORE => ($^O eq 'linux') ? 0x8000 : 0;
-
-sub msg_more ($$) {
- my $self = $_[0]; # $_[1] = buf
- my ($sock, $wbuf, $n, $nlen, $tmpio);
+sub msg_more ($@) {
+ my $self = shift;
+ my ($sock, $wbuf);
$sock = $self->{sock} or return 1;
$wbuf = $self->{wbuf};
- if (MSG_MORE && (!defined($wbuf) || !scalar(@$wbuf)) &&
+ if ($sendmsg_more && (!defined($wbuf) || !scalar(@$wbuf)) &&
!$sock->can('stop_SSL')) {
- $n = send($sock, $_[1], MSG_MORE);
- if (defined $n) {
- $nlen = length($_[1]) - $n;
- return 1 if $nlen == 0; # all done!
- # queue up the unwritten substring:
- $tmpio = tmpio($self, \($_[1]), $n) or return 0;
- push @{$self->{wbuf}}, $tmpio; # autovivifies
- epwait $sock, EPOLLOUT|EPOLLONESHOT;
- return 0;
+ my ($s, $tip, $tmpio);
+ $s = $sendmsg_more->($sock, @_);
+ if (defined $s) {
+ my $exp = sum(map length, @_);
+ return 1 if $s == $exp;
+ while (@_) {
+ $tip = shift;
+ if ($s >= length($tip)) { # fully written
+ $s -= length($tip);
+ } else { # first partial write
+ $tmpio = tmpio $self, \$tip, $s, @_
+ or return 0;
+ last;
+ }
+ }
+ $tmpio // return drop $self, "BUG: tmpio on $s != $exp";
+ } elsif ($! == EAGAIN) {
+ $tip = shift;
+ $tmpio = tmpio $self, \$tip, 0, @_ or return 0;
+ } else { # client disconnected
+ return $self->close;
}
+ push @{$self->{wbuf}}, $tmpio; # autovivifies
+ epwait $sock, EPOLLOUT|EPOLLONESHOT;
+ 0;
+ } else {
+ # don't redispatch into NNTPdeflate::write
+ PublicInbox::DS::write($self, join('', @_));
}
-
- # don't redispatch into NNTPdeflate::write
- PublicInbox::DS::write($self, \($_[1]));
}
# return true if complete, false if incomplete (or failure)
diff --git a/lib/PublicInbox/DSdeflate.pm b/lib/PublicInbox/DSdeflate.pm
index 539adf0f..a9f9693b 100644
--- a/lib/PublicInbox/DSdeflate.pm
+++ b/lib/PublicInbox/DSdeflate.pm
@@ -91,12 +91,15 @@ sub do_read ($$$$) {
}
# override PublicInbox::DS::msg_more
-sub msg_more ($$) {
- my $self = $_[0];
+sub msg_more ($@) {
+ my $self = shift;
# $_[1] may be a reference or not for ->deflate
- my $err = $zout->deflate($_[1], $zbuf);
- $err == Z_OK or die "->deflate failed $err";
+ my $err;
+ for (@_) {
+ $err = $zout->deflate($_, $zbuf);
+ $err == Z_OK or die "->deflate failed $err";
+ }
1;
}
diff --git a/lib/PublicInbox/HTTP.pm b/lib/PublicInbox/HTTP.pm
index b7728bd2..3ca0d18c 100644
--- a/lib/PublicInbox/HTTP.pm
+++ b/lib/PublicInbox/HTTP.pm
@@ -213,14 +213,9 @@ sub response_header_write {
# middlewares such as Deflater may write empty strings
sub chunked_write ($$) {
- my $self = $_[0];
- return if $_[1] eq '';
- msg_more($self, sprintf("%x\r\n", length($_[1])));
- msg_more($self, $_[1]);
-
- # use $self->write(\"\n\n") if you care about real-time
- # streaming responses, public-inbox WWW does not.
- msg_more($self, "\r\n");
+ my ($self, $buf) = @_;
+ $buf eq '' or
+ msg_more $self, sprintf("%x\r\n", length($buf)), $buf, "\r\n";
}
sub identity_write ($$) {
diff --git a/lib/PublicInbox/IMAP.pm b/lib/PublicInbox/IMAP.pm
index b12533cb..0eb21b6a 100644
--- a/lib/PublicInbox/IMAP.pm
+++ b/lib/PublicInbox/IMAP.pm
@@ -605,8 +605,7 @@ sub fetch_blob_cb { # called by git->cat_async via ibx_async_cat
sub emit_rfc822 {
my ($self, $k, undef, $bref) = @_;
- $self->msg_more(" $k {" . length($$bref)."}\r\n");
- $self->msg_more($$bref);
+ $self->msg_more(" $k {" . length($$bref)."}\r\n", $$bref);
}
# Mail::IMAPClient::message_string cares about this by default,
@@ -626,20 +625,18 @@ sub emit_flags { $_[0]->msg_more(' FLAGS ()') }
sub emit_envelope {
my ($self, undef, undef, undef, $eml) = @_;
- $self->msg_more(' ENVELOPE '.eml_envelope($eml));
+ $self->msg_more(' ENVELOPE ', eml_envelope($eml));
}
sub emit_rfc822_header {
my ($self, $k, undef, undef, $eml) = @_;
- $self->msg_more(" $k {".length(${$eml->{hdr}})."}\r\n");
- $self->msg_more(${$eml->{hdr}});
+ $self->msg_more(" $k {".length(${$eml->{hdr}})."}\r\n", ${$eml->{hdr}});
}
# n.b. this is sorted to be after any emit_eml_new ops
sub emit_rfc822_text {
my ($self, $k, undef, $bref) = @_;
- $self->msg_more(" $k {".length($$bref)."}\r\n");
- $self->msg_more($$bref);
+ $self->msg_more(" $k {".length($$bref)."}\r\n", $$bref);
}
sub emit_bodystructure {
@@ -970,8 +967,7 @@ sub partial_emit ($$$) {
} else {
$len = length($str);
}
- $self->msg_more(" $k {$len}\r\n");
- $self->msg_more($str);
+ $self->msg_more(" $k {$len}\r\n", $str);
}
}
diff --git a/lib/PublicInbox/Syscall.pm b/lib/PublicInbox/Syscall.pm
index 4cbe9623..80b46c19 100644
--- a/lib/PublicInbox/Syscall.pm
+++ b/lib/PublicInbox/Syscall.pm
@@ -22,7 +22,8 @@ use POSIX qw(ENOENT ENOSYS EINVAL O_NONBLOCK);
use Socket qw(SOL_SOCKET SCM_RIGHTS);
use Config;
our %SIGNUM = (WINCH => 28); # most Linux, {Free,Net,Open}BSD, *Darwin
-our ($INOTIFY, %PACK);
+our ($INOTIFY, %CONST);
+use List::Util qw(sum);
# $VERSION = '0.25'; # Sys::Syscall version
our @EXPORT_OK = qw(epoll_ctl epoll_create epoll_wait
@@ -290,7 +291,8 @@ EOM
BEGIN {
if ($^O eq 'linux') {
- %PACK = (
+ %CONST = (
+ MSG_MORE => 0x8000,
TMPL_cmsg_len => TMPL_size_t,
# cmsg_len, cmsg_level, cmsg_type
SIZEOF_cmsghdr => SIZEOF_int * 2 + SIZEOF_size_t,
@@ -303,7 +305,7 @@ BEGIN {
'i', # msg_flags
);
} elsif ($^O =~ /\A(?:freebsd|openbsd|netbsd|dragonfly)\z/) {
- %PACK = (
+ %CONST = (
TMPL_cmsg_len => 'L', # socklen_t
SIZEOF_cmsghdr => SIZEOF_int * 3,
CMSG_DATA_off => SIZEOF_ptr == 8 ? '@16' : '',
@@ -316,11 +318,12 @@ BEGIN {
)
}
- $PACK{CMSG_ALIGN_size} = SIZEOF_size_t;
- $PACK{SIZEOF_cmsghdr} //= 0;
- $PACK{TMPL_cmsg_len} //= undef;
- $PACK{CMSG_DATA_off} //= undef;
- $PACK{TMPL_msghdr} //= undef;
+ $CONST{CMSG_ALIGN_size} = SIZEOF_size_t;
+ $CONST{SIZEOF_cmsghdr} //= 0;
+ $CONST{TMPL_cmsg_len} //= undef;
+ $CONST{CMSG_DATA_off} //= undef;
+ $CONST{TMPL_msghdr} //= undef;
+ $CONST{MSG_MORE} //= 0;
}
# SFD_CLOEXEC is arch-dependent, so IN_CLOEXEC may be, too
@@ -455,7 +458,7 @@ sub nodatacow_dir {
if (open my $fh, '<', $_[0]) { nodatacow_fh($fh) }
}
-use constant \%PACK;
+use constant \%CONST;
sub CMSG_ALIGN ($) { ($_[0] + CMSG_ALIGN_size - 1) & ~(CMSG_ALIGN_size - 1) }
use constant CMSG_ALIGN_SIZEOF_cmsghdr => CMSG_ALIGN(SIZEOF_cmsghdr);
sub CMSG_SPACE ($) { CMSG_ALIGN($_[0]) + CMSG_ALIGN_SIZEOF_cmsghdr }
@@ -527,6 +530,22 @@ require PublicInbox::CmdIPC4;
}
@ret;
};
+
+*sendmsg_more = sub ($@) {
+ use bytes qw(length substr);
+ my $sock = shift;
+ my $iov = join('', map { pack 'P'.TMPL_size_t, $_, length } @_);
+ my $mh = pack(TMPL_msghdr,
+ undef, 0, # msg_name, msg_namelen (unused)
+ $iov, scalar(@_), # msg_iov, msg_iovlen
+ undef, 0, # msg_control, msg_controllen (unused),
+ 0); # msg_flags (unused)
+ my $s;
+ do {
+ $s = syscall($SYS_sendmsg, fileno($sock), $mh, MSG_MORE);
+ } while ($s < 0 && $!{EINTR});
+ $s < 0 ? undef : $s;
+};
}
1;
diff --git a/t/syscall.t b/t/syscall.t
new file mode 100644
index 00000000..19390d09
--- /dev/null
+++ b/t/syscall.t
@@ -0,0 +1,14 @@
+use v5.12;
+use autodie;
+use Test::More;
+use PublicInbox::Syscall;
+use Socket qw(AF_UNIX SOCK_STREAM);
+my $sendmsg_more = PublicInbox::Syscall->can('sendmsg_more') or
+ plan skip_all => "sendmsg syscalls not defined on $^O";
+
+socketpair(my $s1, my $s2, AF_UNIX, SOCK_STREAM, 0);
+is $sendmsg_more->($s1, 'hello', 'world'), 10, 'sendmsg_more expected size';
+is sysread($s2, my $buf, 11), 10, 'reader got expected size from sendmsg_more';
+is $buf, 'helloworld', 'sendmsg_more sent expected message';
+
+done_testing;
next prev parent reply other threads:[~2024-06-19 23:41 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-06-19 23:40 [PATCH 0/5] use sendmsg|writev to reduce syscalls Eric Wong
2024-06-19 23:41 ` [PATCH 1/5] ds: remove needless O_APPEND import Eric Wong
2024-06-19 23:41 ` [PATCH 2/5] ds: update indentation to match rest of source Eric Wong
2024-06-19 23:41 ` Eric Wong [this message]
2024-06-19 23:41 ` [PATCH 4/5] http: set Content-Length for simple array responses Eric Wong
2024-06-19 23:41 ` [PATCH 5/5] http: use writev for known Content-Length responses Eric Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240619234104.80183-4-e@80x24.org \
--to=e@80x24.org \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).