From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.2 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF, T_SCC_BODY_TEXT_LINE shortcircuit=no autolearn=ham autolearn_force=no version=3.4.6 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 17C191F68D for ; Mon, 13 Nov 2023 13:15:54 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1699881354; bh=/WNt18g+g+MtipGFEtWMy+Vs6GgJNe45BzkZPt8D/No=; h=From:To:Subject:Date:In-Reply-To:References:From; b=OUo2ZjPbWIKYn5KhRbytc7boi4bTEe2p44kLah/KdSXVSwbqtmNLXnt2Okx/y3IbR 90jMkmbiyAoiaRx1k5RK9EgNM2EIAUQBIwAQvM8QnZxQsvAboxCrsVd8PF14vSKPxy ezAsH7SPjkOXjB69AQlCf0rJ0mn9L5J5KjIvTD60= From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 08/18] treewide: update read_all to avoid eof|close checks Date: Mon, 13 Nov 2023 13:15:41 +0000 Message-Id: <20231113131551.843230-9-e@80x24.org> In-Reply-To: <20231113131551.843230-1-e@80x24.org> References: <20231113131551.843230-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: read_all can be expanded to support FIFOs/pipes/sockets where read-until-EOF behavior is desired. We can also rely on wantarray to support splitting on EOL markers, but it's hard-coded to support only `$/ eq "\n"' since (AFAIK) it's the only way we use the wantarray form `readline'. --- lib/PublicInbox/CodeSearchIdx.pm | 3 +-- lib/PublicInbox/Gcf2.pm | 3 +-- lib/PublicInbox/IO.pm | 18 +++++++++++++----- lib/PublicInbox/LeiInput.pm | 10 ++++------ lib/PublicInbox/LeiMirror.pm | 10 ++++------ lib/PublicInbox/LeiToMail.pm | 3 +-- lib/PublicInbox/Spawn.pm | 4 +--- lib/PublicInbox/TestCommon.pm | 6 +++--- script/public-inbox-learn | 2 +- script/public-inbox-mda | 2 +- script/public-inbox-purge | 2 +- 11 files changed, 31 insertions(+), 32 deletions(-) diff --git a/lib/PublicInbox/CodeSearchIdx.pm b/lib/PublicInbox/CodeSearchIdx.pm index ffd443e1..3601176c 100644 --- a/lib/PublicInbox/CodeSearchIdx.pm +++ b/lib/PublicInbox/CodeSearchIdx.pm @@ -627,8 +627,7 @@ sub index_repo { # run_git cb my $repo = delete $git->{-repo} or return index_next($self); my $roots_fh = delete $repo->{roots_fh} // die 'BUG: no {roots_fh}'; seek($roots_fh, 0, SEEK_SET); - chomp(my @roots = <$roots_fh>); - $roots_fh = eof($roots_fh) | close $roots_fh; # detect readline errors + chomp(my @roots = PublicInbox::IO::read_all $roots_fh); if (!@roots) { warn("E: $git->{git_dir} has no root commits\n"); return index_next($self); diff --git a/lib/PublicInbox/Gcf2.pm b/lib/PublicInbox/Gcf2.pm index 5490049d..e0219b55 100644 --- a/lib/PublicInbox/Gcf2.pm +++ b/lib/PublicInbox/Gcf2.pm @@ -81,8 +81,7 @@ sub add_alt ($$) { # # See https://bugs.debian.org/975607 if (open(my $fh, '<', "$objdir/info/alternates")) { - chomp(my @abs_alt = grep(m!^/!, <$fh>)); - $fh = eof($fh) | close $fh; # detect readline errors + chomp(my @abs_alt = grep m!^/!, PublicInbox::IO::read_all $fh); $gcf2->add_alternate($_) for @abs_alt; } $gcf2->add_alternate($objdir); diff --git a/lib/PublicInbox/IO.pm b/lib/PublicInbox/IO.pm index 0d303500..11ce9be1 100644 --- a/lib/PublicInbox/IO.pm +++ b/lib/PublicInbox/IO.pm @@ -62,13 +62,21 @@ sub poll_in ($;$) { IO::Poll::_poll($_[1] // -1, fileno($_[0]), my $ev = POLLIN); } -sub read_all ($;$$) { +sub read_all ($;$$$) { # pass $len=0 to read until EOF for :utf8 handles use autodie qw(read); - my ($io, $len, $bref) = @_; + my ($io, $len, $bref, $off) = @_; $bref //= \(my $buf); - my $r = read($io, $$bref, $len //= -s $io); - croak("read($io) ($r != $len)") if $len != $r; - $$bref; + $off //= 0; + my $r = 0; + if (my $left = $len //= -s $io) { # known size (binmode :raw/:unix) + do { # retry for binmode :unix + $r = read($io, $$bref, $left, $off += $r) or croak( + "read($io) premature EOF ($left/$len remain)"); + } while ($left -= $r); + } else { # read until EOF + while (($r = read($io, $$bref, 65536, $off += $r))) {} + } + wantarray ? split(/^/sm, $$bref) : $$bref } sub try_cat ($) { diff --git a/lib/PublicInbox/LeiInput.pm b/lib/PublicInbox/LeiInput.pm index 68c3c459..daba9a8e 100644 --- a/lib/PublicInbox/LeiInput.pm +++ b/lib/PublicInbox/LeiInput.pm @@ -79,12 +79,10 @@ sub input_net_cb { # imap_each, nntp_each cb sub input_fh { my ($self, $ifmt, $fh, $name, @args) = @_; if ($ifmt eq 'eml') { - my $buf = do { local $/; <$fh> }; - my $ok = defined($buf) ? 1 : 0; - ++$ok if eof($fh); - ++$ok if $fh->close; - $ok == 3 or return $self->{lei}->child_error($?, <<""); -error reading $name: $! (\$?=$?) + my $buf = eval { PublicInbox::IO::read_all $fh, 0 }; + my $e = $@; + return $self->{lei}->child_error($?, <<"") if !$fh->close || $e; +error reading $name: $! (\$?=$?) (\$@=$e) PublicInbox::Eml::strip_from($buf); diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm index 84266d03..e048a807 100644 --- a/lib/PublicInbox/LeiMirror.pm +++ b/lib/PublicInbox/LeiMirror.pm @@ -307,10 +307,9 @@ sub fgrp_update { my $dstfh = delete $fgrp->{dstfh} or return; seek($srcfh, 0, SEEK_SET); seek($dstfh, 0, SEEK_SET); - my %src = map { chomp; split(/\0/) } (<$srcfh>); - my %dst = map { chomp; split(/\0/) } (<$dstfh>); - $srcfh = eof($srcfh) | close $srcfh; # detects readline errors - $dstfh = eof($dstfh) | close $dstfh; # ditto + my %src = map { chomp; split /\0/ } PublicInbox::IO::read_all $srcfh; + my %dst = map { chomp; split /\0/ } PublicInbox::IO::read_all $dstfh; + $srcfh = $dstfh = undef; my $w = start_update_ref($fgrp) or return; my $lei = $fgrp->{lei}; my $ndel; @@ -508,8 +507,7 @@ EOM my $l = File::Spec->abs2rel($alt, File::Spec->rel2abs($o)); open my $fh, '+>>', my $f = "$o/info/alternates"; seek($fh, 0, SEEK_SET); # Perl did SEEK_END when it saw '>>' - my $seen = grep(/\A\Q$l\E\n/, <$fh>); # error check with eof - eof($fh) or die "not at `$f' EOF ($!)"; # $! was set by readline + my $seen = grep /\A\Q$l\E\n/, PublicInbox::IO::read_all $fh; print $fh "$l\n" if !$seen; close $fh; } diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm index 0d2f586a..2928be45 100644 --- a/lib/PublicInbox/LeiToMail.pm +++ b/lib/PublicInbox/LeiToMail.pm @@ -687,8 +687,7 @@ sub _pre_augment_v2 { my $d = "$lei->{ale}->{git}->{git_dir}/objects"; open my $fh, '+>>', my $f = "$dir/git/0.git/objects/info/alternates"; seek($fh, 0, SEEK_SET); # Perl did SEEK_END when it saw '>>' - my $seen = grep(/\A\Q$d\E\n/, <$fh>); - eof($fh) or die "not at `$f' EOF ($!)"; # $! was set by readline + my $seen = grep /\A\Q$d\E\n/, PublicInbox::IO::read_all $fh; print $fh "$d\n" if !$seen; close $fh; } diff --git a/lib/PublicInbox/Spawn.pm b/lib/PublicInbox/Spawn.pm index 8cc4dfaf..849438bc 100644 --- a/lib/PublicInbox/Spawn.pm +++ b/lib/PublicInbox/Spawn.pm @@ -396,14 +396,12 @@ sub popen_wr { sub read_out_err ($) { my ($opt) = @_; - local $/; for my $fd (1, 2) { # read stdout/stderr my $fh = delete($opt->{"fh.$fd"}) // next; seek($fh, 0, SEEK_SET); my $dst = $opt->{$fd}; $dst = $opt->{$fd} = $dst->[1] if ref($dst) eq 'ARRAY'; - $$dst .= <$fh>; - $fh = eof($fh) | close $fh; # detects readline errors + PublicInbox::IO::read_all $fh, 0, $dst, length($$dst); } } diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm index b84886a0..caf709c2 100644 --- a/lib/PublicInbox/TestCommon.pm +++ b/lib/PublicInbox/TestCommon.pm @@ -48,16 +48,16 @@ sub require_bsd (;$) { sub xbail (@) { BAIL_OUT join(' ', map { ref() ? (explain($_)) : ($_) } @_) } -sub read_all ($;$$) { +sub read_all ($;$$$) { require PublicInbox::IO; - PublicInbox::IO::read_all($_[0], $_[1], $_[2]) + PublicInbox::IO::read_all($_[0], $_[1], $_[2], $_[3]) } sub eml_load ($) { my ($path, $cb) = @_; open(my $fh, '<', $path); require PublicInbox::Eml; - PublicInbox::Eml->new(\(read_all($fh))); + PublicInbox::Eml->new(\(scalar read_all $fh)); } sub tmpdir (;$) { diff --git a/script/public-inbox-learn b/script/public-inbox-learn index 6a1bc890..a955cdf6 100755 --- a/script/public-inbox-learn +++ b/script/public-inbox-learn @@ -42,7 +42,7 @@ local $PublicInbox::Import::DROP_UNIQUE_UNSUB; PublicInbox::Import::load_config($pi_cfg); my $err; my $mime = PublicInbox::Eml->new(do{ - defined(my $data = do { local $/; }) or die "read STDIN: $!\n"; + my $data = PublicInbox::IO::read_all \*STDIN; PublicInbox::Eml::strip_from($data); if ($train ne 'rm') { diff --git a/script/public-inbox-mda b/script/public-inbox-mda index b2e0908d..b463b07b 100755 --- a/script/public-inbox-mda +++ b/script/public-inbox-mda @@ -44,7 +44,7 @@ use PublicInbox::Spamcheck; # in case there's bugs in our code or user error. my $emergency = $ENV{PI_EMERGENCY} || "$ENV{HOME}/.public-inbox/emergency/"; $ems = PublicInbox::Emergency->new($emergency); -my $str = do { local $/; }; +my $str = PublicInbox::IO::read_all \*STDIN; PublicInbox::Eml::strip_from($str); $ems->prepare(\$str); my $eml = PublicInbox::Eml->new(\$str); diff --git a/script/public-inbox-purge b/script/public-inbox-purge index 8f9b0b16..618cfec4 100755 --- a/script/public-inbox-purge +++ b/script/public-inbox-purge @@ -33,7 +33,7 @@ PublicInbox::Admin::do_chdir(delete $opt->{C}); my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt); PublicInbox::AdminEdit::check_editable(\@ibxs); -defined(my $data = do { local $/; }) or die "read STDIN: $!\n"; +my $data = PublicInbox::IO::read_all \*STDIN; PublicInbox::Eml::strip_from($data); my $n_purged = 0;