From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id D86501F670 for ; Sat, 23 Oct 2021 21:53:46 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH] cmd_ipc4: retry sendmsg on ENOBUFS/ENOMEM/ETOOMANYREFS Date: Sat, 23 Oct 2021 21:53:46 +0000 Message-Id: <20211023215346.10505-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: I'm seeing ENOBUFS on a RAM-starved system, and slowing the sender down enough for the receiver to drain the buffers seems to work. ENOMEM and ETOOMANYREFS could be in the same boat as ENOBUFS. Watching for POLLOUT events via select/poll/epoll_wait doesn't seem to work, since the kernel can already sleep (or return EAGAIN) for cases where POLLOUT would work. --- lib/PublicInbox/CmdIPC4.pm | 11 ++++++++++- lib/PublicInbox/Spawn.pm | 27 +++++++++++++++++++++++---- 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/lib/PublicInbox/CmdIPC4.pm b/lib/PublicInbox/CmdIPC4.pm index 74dbf8a1..c3a7f56e 100644 --- a/lib/PublicInbox/CmdIPC4.pm +++ b/lib/PublicInbox/CmdIPC4.pm @@ -17,7 +17,16 @@ no warnings 'once'; my ($sock, $fds, undef, $flags) = @_; my $mh = Socket::MsgHdr->new(buf => $_[2]); $mh->cmsghdr(SOL_SOCKET, SCM_RIGHTS, pack('i' x scalar(@$fds), @$fds)); - Socket::MsgHdr::sendmsg($sock, $mh, $flags); + my $s; + my $try = 0; + do { + $s = Socket::MsgHdr::sendmsg($sock, $mh, $flags); + } while (!defined($s) && + ($!{ENOBUFS} || $!{ENOMEM} || $!{ETOOMANYREFS}) && + (++$try < 50) && + warn "sleeping on sendmsg: $! (#$try)\n" && + select(undef, undef, undef, 0.1) == 0); + $s; }; *recv_cmd4 = sub ($$$) { diff --git a/lib/PublicInbox/Spawn.pm b/lib/PublicInbox/Spawn.pm index e940d3c9..6ca1ca2a 100644 --- a/lib/PublicInbox/Spawn.pm +++ b/lib/PublicInbox/Spawn.pm @@ -34,6 +34,9 @@ BEGIN { #include #include #include +#include +#include +#include /* some platforms need alloca.h, but some don't */ #if defined(__GNUC__) && !defined(alloca) @@ -162,6 +165,22 @@ int pi_fork_exec(SV *redirref, SV *file, SV *cmdref, SV *envref, SV *rlimref, return (int)pid; } +static int sleep_wait(unsigned *try, int err) +{ + const struct timespec req = { 0, 100000000 }; /* 100ms */ + switch (err) { + case ENOBUFS: case ENOMEM: case ETOOMANYREFS: + if (++*try < 50) { + fprintf(stderr, "sleeping on sendmsg: %s (#%u)\n", + strerror(err), *try); + nanosleep(&req, NULL); + return 1; + } + default: + return 0; + } +} + #if defined(CMSG_SPACE) && defined(CMSG_LEN) #define SEND_FD_CAPA 10 #define SEND_FD_SPACE (SEND_FD_CAPA * sizeof(int)) @@ -180,6 +199,7 @@ SV *send_cmd4(PerlIO *s, SV *svfds, SV *data, int flags) AV *fds = (AV *)SvRV(svfds); I32 i, nfds = av_len(fds) + 1; int *fdp; + unsigned try = 0; if (SvOK(data)) { iov.iov_base = SvPV(data, dlen); @@ -207,7 +227,9 @@ SV *send_cmd4(PerlIO *s, SV *svfds, SV *data, int flags) *fdp++ = SvIV(*fd); } } - sent = sendmsg(PerlIO_fileno(s), &msg, flags); + do { + sent = sendmsg(PerlIO_fileno(s), &msg, flags); + } while (sent < 0 && sleep_wait(&try, errno)); return sent >= 0 ? newSViv(sent) : &PL_sv_undef; } @@ -258,9 +280,6 @@ ALL_LIBC #include #include #include -#include -#include -#include void nodatacow_fd(int fd) {