Some minor things to fix some small annoyances; I needed a break from other areas of the code. Eric Wong (5): git: qx: avoid extra "local" for scalar context case import: check for git->qx errors, clearer return values check defined return value for localized slurp errors ds: simplify EventLoop implementation ds: flatten + reuse @events, epoll_wait style fixes lib/PublicInbox/DS.pm | 49 ++++++++-------------------- lib/PublicInbox/DSKQXS.pm | 2 +- lib/PublicInbox/DSPoll.pm | 3 +- lib/PublicInbox/Gcf2.pm | 2 +- lib/PublicInbox/Git.pm | 6 ++-- lib/PublicInbox/Import.pm | 9 +++--- lib/PublicInbox/Inbox.pm | 9 ++---- lib/PublicInbox/Syscall.pm | 66 +++++++++++++++++++++----------------- script/public-inbox-edit | 3 +- script/public-inbox-init | 6 +--- script/public-inbox-learn | 3 +- script/public-inbox-purge | 2 +- t/ds-poll.t | 28 ++++++++-------- t/epoll.t | 8 ++--- t/git.t | 5 +++ 15 files changed, 92 insertions(+), 109 deletions(-)
We can use the ternary operator to avoid an early return, here --- lib/PublicInbox/Git.pm | 6 ++---- t/git.t | 1 + 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm index 08406925..73dc7d3e 100644 --- a/lib/PublicInbox/Git.pm +++ b/lib/PublicInbox/Git.pm @@ -362,10 +362,8 @@ sub popen { sub qx { my ($self, @cmd) = @_; my $fh = $self->popen(@cmd); - local $/ = "\n"; - return <$fh> if wantarray; - local $/; - <$fh> + local $/ = wantarray ? "\n" : undef; + <$fh>; } # check_async and cat_async may trigger the other, so ensure they're diff --git a/t/git.t b/t/git.t index dfd7173a..dcd053c5 100644 --- a/t/git.t +++ b/t/git.t @@ -79,6 +79,7 @@ if (1) { my @ref = $gcf->qx(qw(cat-file blob), $buf); my $nl = scalar @ref; ok($nl > 1, "qx returned array length of $nl"); + is(join('', @ref), $ref, 'qx array and scalar context both work'); $gcf->qx(qw(repack -adq)); ok($gcf->packed_bytes > 0, 'packed size is positive');
Those git commands can fail and git->qx will set $? when it fails. There's no need for the extra indirection of the @ret array, either. Improve git->qx coverage to check for $? while we're at it. --- lib/PublicInbox/Import.pm | 9 +++++---- t/git.t | 4 ++++ 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm index 2cb4896a..e0a84bfd 100644 --- a/lib/PublicInbox/Import.pm +++ b/lib/PublicInbox/Import.pm @@ -48,7 +48,7 @@ sub gfi_start { return ($self->{in}, $self->{out}) if $self->{pid}; - my (@ret, $out_r, $out_w); + my ($in_r, $pid, $out_r, $out_w); pipe($out_r, $out_w) or die "pipe failed: $!"; $self->lock_acquire; @@ -56,27 +56,28 @@ sub gfi_start { my ($git, $ref) = @$self{qw(git ref)}; local $/ = "\n"; chomp($self->{tip} = $git->qx(qw(rev-parse --revs-only), $ref)); + die "fatal: rev-parse --revs-only $ref: \$?=$?" if $?; if ($self->{path_type} ne '2/38' && $self->{tip}) { local $/ = "\0"; my @t = $git->qx(qw(ls-tree -r -z --name-only), $ref); + die "fatal: ls-tree -r -z --name-only $ref: \$?=$?" if $?; chomp @t; $self->{-tree} = { map { $_ => 1 } @t }; } my @cmd = ('git', "--git-dir=$git->{git_dir}", qw(fast-import --quiet --done --date-format=raw)); - my ($in_r, $pid) = popen_rd(\@cmd, undef, { 0 => $out_r }); + ($in_r, $pid) = popen_rd(\@cmd, undef, { 0 => $out_r }); $out_w->autoflush(1); $self->{in} = $in_r; $self->{out} = $out_w; $self->{pid} = $pid; $self->{nchg} = 0; - @ret = ($in_r, $out_w); }; if ($@) { $self->lock_release; die $@; } - @ret; + ($in_r, $out_w); } sub wfail () { die "write to fast-import failed: $!" } diff --git a/t/git.t b/t/git.t index dcd053c5..2cfff248 100644 --- a/t/git.t +++ b/t/git.t @@ -76,13 +76,17 @@ if (1) { is(length($$x), $size, 'read correct number of bytes'); my $ref = $gcf->qx(qw(cat-file blob), $buf); + is($?, 0, 'no error on scalar success'); my @ref = $gcf->qx(qw(cat-file blob), $buf); + is($?, 0, 'no error on wantarray success'); my $nl = scalar @ref; ok($nl > 1, "qx returned array length of $nl"); is(join('', @ref), $ref, 'qx array and scalar context both work'); $gcf->qx(qw(repack -adq)); ok($gcf->packed_bytes > 0, 'packed size is positive'); + $gcf->qx(qw(rev-parse --verify bogus)); + isnt($?, 0, '$? set on failure'.$?); } SKIP: {
Reading from regular files (even on STDIN) can fail when dealing with flakey storage. --- lib/PublicInbox/Gcf2.pm | 2 +- lib/PublicInbox/Inbox.pm | 9 +++------ script/public-inbox-edit | 3 ++- script/public-inbox-init | 6 +----- script/public-inbox-learn | 3 +-- script/public-inbox-purge | 2 +- 6 files changed, 9 insertions(+), 16 deletions(-) diff --git a/lib/PublicInbox/Gcf2.pm b/lib/PublicInbox/Gcf2.pm index 041dffe7..fe6afef2 100644 --- a/lib/PublicInbox/Gcf2.pm +++ b/lib/PublicInbox/Gcf2.pm @@ -35,7 +35,7 @@ BEGIN { if (open(my $fh, '<', $f)) { chomp($l, $c); local $/; - $c_src = <$fh>; + defined($c_src = <$fh>) or die "read $f: $!\n"; $CFG{LIBS} = $l; $CFG{CCFLAGSEX} = $c; last; diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm index 1b9b56ff..af6380a7 100644 --- a/lib/PublicInbox/Inbox.pm +++ b/lib/PublicInbox/Inbox.pm @@ -210,12 +210,9 @@ sub over { sub try_cat { my ($path) = @_; - my $rv = ''; - if (open(my $fh, '<', $path)) { - local $/; - $rv = <$fh>; - } - $rv; + open(my $fh, '<', $path) or return ''; + local $/; + <$fh> // ''; } sub cat_desc ($) { diff --git a/script/public-inbox-edit b/script/public-inbox-edit index a70614fc..81f023bc 100755 --- a/script/public-inbox-edit +++ b/script/public-inbox-edit @@ -183,7 +183,8 @@ retry_edit: # rename/relink $edit_fn open my $new_fh, '<', $edit_fn or die "can't read edited file ($edit_fn): $!\n"; - my $new_raw = do { local $/; <$new_fh> }; + defined(my $new_raw = do { local $/; <$new_fh> }) or die + "read $edit_fn: $!\n"; if (!$opt->{raw}) { # get rid of the From we added diff --git a/script/public-inbox-init b/script/public-inbox-init index 6d538e43..7ac77830 100755 --- a/script/public-inbox-init +++ b/script/public-inbox-init @@ -100,11 +100,7 @@ if (-e $pi_config) { defined $perm or die "(f)stat failed on $pi_config: $!\n"; chmod($perm & 07777, $fh) or die "(f)chmod failed on future $pi_config: $!\n"; - my $old; - { - local $/; - $old = <$oh>; - } + defined(my $old = do { local $/; <$oh> }) or die "read $pi_config: $!\n"; print $fh $old or die "failed to write: $!\n"; close $oh or die "failed to close $pi_config: $!\n"; diff --git a/script/public-inbox-learn b/script/public-inbox-learn index 9352c8ff..1731a4ba 100755 --- a/script/public-inbox-learn +++ b/script/public-inbox-learn @@ -39,8 +39,7 @@ my $spamc = PublicInbox::Spamcheck::Spamc->new; my $pi_cfg = PublicInbox::Config->new; my $err; my $mime = PublicInbox::Eml->new(do{ - local $/; - my $data = <STDIN>; + defined(my $data = do { local $/; <STDIN> }) or die "read STDIN: $!\n"; $data =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; if ($train ne 'rm') { diff --git a/script/public-inbox-purge b/script/public-inbox-purge index 7bca11ea..52f1f18a 100755 --- a/script/public-inbox-purge +++ b/script/public-inbox-purge @@ -32,7 +32,7 @@ if ($opt->{help}) { print $help; exit 0 }; my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt); PublicInbox::AdminEdit::check_editable(\@ibxs); -my $data = do { local $/; <STDIN> }; +defined(my $data = do { local $/; <STDIN> }) or die "read STDIN: $!\n"; $data =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; my $n_purged = 0;
More importantly, make it easier-to-find the sub by avoiding runtime manipulation of subroutine names. There's no point in avoiding a potential call to _InitPoller in EventLoop since entering EventLoop is rare. On the contrary, PublicInbox::DS->new is called often and this change to avoid entering _InitPoller there may have more benefits (which may still be unmeasurable). --- lib/PublicInbox/DS.pm | 29 ++++++++--------------------- 1 file changed, 8 insertions(+), 21 deletions(-) diff --git a/lib/PublicInbox/DS.pm b/lib/PublicInbox/DS.pm index a02b3bb7..12df5919 100644 --- a/lib/PublicInbox/DS.pm +++ b/lib/PublicInbox/DS.pm @@ -50,7 +50,6 @@ our ( $PostLoopCallback, # subref to call at the end of each loop, if defined (global) $LoopTimeout, # timeout of event loop in milliseconds - $DoneInit, # if we've done the one-time module init yet @Timers, # timers $in_loop, ); @@ -75,12 +74,9 @@ sub Reset { @Timers = (); $PostLoopCallback = undef; - $DoneInit = 0; $_io = undef; # closes real $Epoll FD $Epoll = undef; # may call DSKQXS::DESTROY - - *EventLoop = *FirstTimeEventLoop; } =head2 C<< CLASS->SetLoopTimeout( $timeout ) >> @@ -137,14 +133,13 @@ sub set_cloexec ($) { fcntl($_io, F_SETFD, $fl | FD_CLOEXEC); } +# caller sets return value to $Epoll sub _InitPoller { - return if $DoneInit; - $DoneInit = 1; - if (PublicInbox::Syscall::epoll_defined()) { - $Epoll = epoll_create(); - set_cloexec($Epoll) if (defined($Epoll) && $Epoll >= 0); + my $fd = epoll_create(); + set_cloexec($fd) if (defined($fd) && $fd >= 0); + $fd; } else { my $cls; for (qw(DSKQXS DSPoll)) { @@ -152,9 +147,8 @@ sub _InitPoller last if eval "require $cls"; } $cls->import(qw(epoll_ctl epoll_wait)); - $Epoll = $cls->new; + $cls->new; } - *EventLoop = *EpollEventLoop; } =head2 C<< CLASS->EventLoop() >> @@ -163,13 +157,6 @@ Start processing IO events. In most daemon programs this never exits. See C<PostLoopCallback> below for how to exit the loop. =cut -sub FirstTimeEventLoop { - my $class = shift; - - _InitPoller(); - - EventLoop($class); -} sub now () { clock_gettime(CLOCK_MONOTONIC) } @@ -271,7 +258,8 @@ sub PostEventLoop () { $PostLoopCallback ? $PostLoopCallback->(\%DescriptorMap) : 1; } -sub EpollEventLoop { +sub EventLoop { + $Epoll //= _InitPoller(); local $in_loop = 1; do { my @events; @@ -330,8 +318,7 @@ sub new { $self->{sock} = $sock; my $fd = fileno($sock); - _InitPoller(); - + $Epoll //= _InitPoller(); retry: if (epoll_ctl($Epoll, EPOLL_CTL_ADD, $fd, $ev)) { if ($! == EINVAL && ($ev & EPOLLEXCLUSIVE)) {
Consistently returning the equivalent of pollfd.revents in a portable manner was never worth the effort for us, as we use the same ->event_step callback regardless of POLLIN/POLLOUT/POLLHUP. Being a Perl, @events knows it size and we don't have to return a maximum index for the caller to iterate on. We can also avoid redundant integer coercion ("+0") since we ensure everything is an IV in other places. Finally, vec() is preferable to ("\0" x $size) for resizing buffers because it only needs to write the extended portion and not overwrite the entire buffer. --- lib/PublicInbox/DS.pm | 20 ++++-------- lib/PublicInbox/DSKQXS.pm | 2 +- lib/PublicInbox/DSPoll.pm | 3 +- lib/PublicInbox/Syscall.pm | 66 +++++++++++++++++++++----------------- t/ds-poll.t | 28 ++++++++-------- t/epoll.t | 8 ++--- 6 files changed, 63 insertions(+), 64 deletions(-) diff --git a/lib/PublicInbox/DS.pm b/lib/PublicInbox/DS.pm index 12df5919..97a6f6ef 100644 --- a/lib/PublicInbox/DS.pm +++ b/lib/PublicInbox/DS.pm @@ -87,9 +87,7 @@ A timeout of 0 (zero) means poll forever. A timeout of -1 means poll and return immediately. =cut -sub SetLoopTimeout { - return $LoopTimeout = $_[1] + 0; -} +sub SetLoopTimeout { $LoopTimeout = $_[1] + 0 } =head2 C<< PublicInbox::DS::add_timer( $seconds, $coderef, $arg) >> @@ -200,12 +198,7 @@ sub RunTimers { my $timeout = int(($Timers[0][0] - $now) * 1000) + 1; # -1 is an infinite timeout, so prefer a real timeout - return $timeout if $LoopTimeout == -1; - - # otherwise pick the lower of our regular timeout and time until - # the next timer - return $LoopTimeout if $LoopTimeout < $timeout; - return $timeout; + ($LoopTimeout < 0 || $LoopTimeout >= $timeout) ? $timeout : $LoopTimeout; } # We can't use waitpid(-1) safely here since it can hit ``, system(), @@ -261,19 +254,18 @@ sub PostEventLoop () { sub EventLoop { $Epoll //= _InitPoller(); local $in_loop = 1; + my @events; do { - my @events; - my $i; my $timeout = RunTimers(); # get up to 1000 events - my $evcount = epoll_wait($Epoll, 1000, $timeout, \@events); - for ($i=0; $i<$evcount; $i++) { + epoll_wait($Epoll, 1000, $timeout, \@events); + for my $fd (@events) { # it's possible epoll_wait returned many events, including some at the end # that ones in the front triggered unregister-interest actions. if we # can't find the %sock entry, it's because we're no longer interested # in that event. - $DescriptorMap{$events[$i]->[0]}->event_step; + $DescriptorMap{$fd}->event_step; } } while (PostEventLoop()); _run_later(); diff --git a/lib/PublicInbox/DSKQXS.pm b/lib/PublicInbox/DSKQXS.pm index d1d3fe60..aa2c9168 100644 --- a/lib/PublicInbox/DSKQXS.pm +++ b/lib/PublicInbox/DSKQXS.pm @@ -134,7 +134,7 @@ sub epoll_wait { } } # caller only cares for $events[$i]->[0] - scalar(@$events); + $_ = $_->[0] for @$events; } # kqueue is close-on-fork (not exec), so we must not close it diff --git a/lib/PublicInbox/DSPoll.pm b/lib/PublicInbox/DSPoll.pm index 1d9b51d9..a218f695 100644 --- a/lib/PublicInbox/DSPoll.pm +++ b/lib/PublicInbox/DSPoll.pm @@ -45,14 +45,13 @@ sub epoll_wait { my $fd = $pset[$i++]; my $revents = $pset[$i++] or next; delete($self->{$fd}) if $self->{$fd} & EPOLLONESHOT; - push @$events, [ $fd ]; + push @$events, $fd; } my $nevents = scalar @$events; if ($n != $nevents) { warn "BUG? poll() returned $n, but got $nevents"; } } - $n; } 1; diff --git a/lib/PublicInbox/Syscall.pm b/lib/PublicInbox/Syscall.pm index e4f00a2a..c403f78a 100644 --- a/lib/PublicInbox/Syscall.pm +++ b/lib/PublicInbox/Syscall.pm @@ -227,38 +227,46 @@ sub epoll_ctl_mod8 { our $epoll_wait_events; our $epoll_wait_size = 0; sub epoll_wait_mod4 { - # resize our static buffer if requested size is bigger than we've ever done - if ($_[1] > $epoll_wait_size) { - $epoll_wait_size = $_[1]; - $epoll_wait_events = "\0" x 12 x $epoll_wait_size; - } - my $ct = syscall($SYS_epoll_wait, $_[0]+0, $epoll_wait_events, $_[1]+0, $_[2]+0); - for (0..$ct-1) { - @{$_[3]->[$_]}[1,0] = unpack("LL", substr($epoll_wait_events, 12*$_, 8)); - } - return $ct; + my ($epfd, $maxevents, $timeout_msec, $events) = @_; + # resize our static buffer if maxevents bigger than we've ever done + if ($maxevents > $epoll_wait_size) { + $epoll_wait_size = $maxevents; + vec($epoll_wait_events, $maxevents * 12 * 8 - 1, 1) = 0; + } + @$events = (); + my $ct = syscall($SYS_epoll_wait, $epfd, $epoll_wait_events, + $maxevents, $timeout_msec); + for (0..$ct - 1) { + # 12-byte struct epoll_event + # 4 bytes uint32_t events mask (skipped, useless to us) + # 8 bytes: epoll_data_t union (first 4 bytes are the fd) + # So we skip the first 4 bytes and take the middle 4: + $events->[$_] = unpack('L', substr($epoll_wait_events, + 12 * $_ + 4, 4)); + } } sub epoll_wait_mod8 { - # resize our static buffer if requested size is bigger than we've ever done - if ($_[1] > $epoll_wait_size) { - $epoll_wait_size = $_[1]; - $epoll_wait_events = "\0" x 16 x $epoll_wait_size; - } - my $ct; - if ($no_deprecated) { - $ct = syscall($SYS_epoll_wait, $_[0]+0, $epoll_wait_events, $_[1]+0, $_[2]+0, undef); - } else { - $ct = syscall($SYS_epoll_wait, $_[0]+0, $epoll_wait_events, $_[1]+0, $_[2]+0); - } - for (0..$ct-1) { - # 16 byte epoll_event structs, with format: - # 4 byte mask [idx 1] - # 4 byte padding (we put it into idx 2, useless) - # 8 byte data (first 4 bytes are fd, into idx 0) - @{$_[3]->[$_]}[1,2,0] = unpack("LLL", substr($epoll_wait_events, 16*$_, 12)); - } - return $ct; + my ($epfd, $maxevents, $timeout_msec, $events) = @_; + + # resize our static buffer if maxevents bigger than we've ever done + if ($maxevents > $epoll_wait_size) { + $epoll_wait_size = $maxevents; + vec($epoll_wait_events, $maxevents * 16 * 8 - 1, 1) = 0; + } + @$events = (); + my $ct = syscall($SYS_epoll_wait, $epfd, $epoll_wait_events, + $maxevents, $timeout_msec, + $no_deprecated ? undef : ()); + for (0..$ct - 1) { + # 16-byte struct epoll_event + # 4 bytes uint32_t events mask (skipped, useless to us) + # 4 bytes padding (skipped, useless) + # 8 bytes epoll_data_t union (first 4 bytes are the fd) + # So skip the first 8 bytes, take 4, and ignore the last 4: + $events->[$_] = unpack('L', substr($epoll_wait_events, + 16 * $_ + 8, 4)); + } } sub signalfd ($$$) { diff --git a/t/ds-poll.t b/t/ds-poll.t index 3771059b..0ee57b69 100644 --- a/t/ds-poll.t +++ b/t/ds-poll.t @@ -16,35 +16,35 @@ pipe($r, $w) or die; pipe($x, $y) or die; is($p->epoll_ctl(EPOLL_CTL_ADD, fileno($r), EPOLLIN), 0, 'add EPOLLIN'); my $events = []; -my $n = $p->epoll_wait(9, 0, $events); +$p->epoll_wait(9, 0, $events); is_deeply($events, [], 'no events set'); -is($n, 0, 'nothing ready, yet'); is($p->epoll_ctl(EPOLL_CTL_ADD, fileno($w), EPOLLOUT|EPOLLONESHOT), 0, 'add EPOLLOUT|EPOLLONESHOT'); -$n = $p->epoll_wait(9, -1, $events); -is($n, 1, 'got POLLOUT event'); -is($events->[0]->[0], fileno($w), '$w ready'); +$p->epoll_wait(9, -1, $events); +is(scalar(@$events), 1, 'got POLLOUT event'); +is($events->[0], fileno($w), '$w ready'); -$n = $p->epoll_wait(9, 0, $events); -is($n, 0, 'nothing ready after oneshot'); +$p->epoll_wait(9, 0, $events); +is(scalar(@$events), 0, 'nothing ready after oneshot'); is_deeply($events, [], 'no events set after oneshot'); syswrite($w, '1') == 1 or die; for my $t (0..1) { - $n = $p->epoll_wait(9, $t, $events); - is($events->[0]->[0], fileno($r), "level-trigger POLLIN ready #$t"); - is($n, 1, "only event ready #$t"); + $p->epoll_wait(9, $t, $events); + is($events->[0], fileno($r), "level-trigger POLLIN ready #$t"); + is(scalar(@$events), 1, "only event ready #$t"); } syswrite($y, '1') == 1 or die; is($p->epoll_ctl(EPOLL_CTL_ADD, fileno($x), EPOLLIN|EPOLLONESHOT), 0, 'EPOLLIN|EPOLLONESHOT add'); -is($p->epoll_wait(9, -1, $events), 2, 'epoll_wait has 2 ready'); -my @fds = sort(map { $_->[0] } @$events); +$p->epoll_wait(9, -1, $events); +is(scalar @$events, 2, 'epoll_wait has 2 ready'); +my @fds = sort @$events; my @exp = sort((fileno($r), fileno($x))); is_deeply(\@fds, \@exp, 'got both ready FDs'); is($p->epoll_ctl(EPOLL_CTL_DEL, fileno($r), 0), 0, 'EPOLL_CTL_DEL OK'); -$n = $p->epoll_wait(9, 0, $events); -is($n, 0, 'nothing ready after EPOLL_CTL_DEL'); +$p->epoll_wait(9, 0, $events); +is(scalar @$events, 0, 'nothing ready after EPOLL_CTL_DEL'); done_testing; diff --git a/t/epoll.t b/t/epoll.t index b47650e3..a1e73e07 100644 --- a/t/epoll.t +++ b/t/epoll.t @@ -12,11 +12,11 @@ is(epoll_ctl($epfd, EPOLL_CTL_ADD, fileno($w), EPOLLOUT), 0, 'epoll_ctl socket EPOLLOUT'); my @events; -is(epoll_wait($epfd, 100, 10000, \@events), 1, 'epoll_wait returns'); +epoll_wait($epfd, 100, 10000, \@events); is(scalar(@events), 1, 'got one event'); -is($events[0]->[0], fileno($w), 'got expected FD'); -is($events[0]->[1], EPOLLOUT, 'got expected event'); +is($events[0], fileno($w), 'got expected FD'); close $w; -is(epoll_wait($epfd, 100, 0, \@events), 0, 'epoll_wait timeout'); +epoll_wait($epfd, 100, 0, \@events); +is(@events, 0, 'epoll_wait timeout'); done_testing;