From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.2 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF, T_SCC_BODY_TEXT_LINE shortcircuit=no autolearn=ham autolearn_force=no version=3.4.6 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 8DD591F68D for ; Thu, 30 Nov 2023 11:41:10 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1701344470; bh=EKIhD2LNKOAPLOE78uqEL+DfXqTu7lV1V6BurM2pWUg=; h=From:To:Subject:Date:In-Reply-To:References:From; b=i+73xp55cfIGnORUjSqahZ10oo7jnrX2AiGbXccfgZ5FVx+1iNtw8Tlhy0UyhqEWB t0MO7jyKIaTA5+oKrlUq7o/68GQia+nbMtK1u4/ZcQMsWxzc9kbZ1gNu70OHS2zS4K Ch/SVylXjY3FIzGkie1oA9GLdipiyfwxyrKHiymo= From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 09/15] spawn: drop IO layer support from redirects Date: Thu, 30 Nov 2023 11:41:02 +0000 Message-ID: <20231130114109.2577708-10-e@80x24.org> In-Reply-To: <20231130114109.2577708-1-e@80x24.org> References: <20231130114109.2577708-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: When setting up stdin for commands, the write_file API is convenient enough nowadays to not be worth having special support with process spawning. When reading stdout of commands, we should probably be using utf8_maybe everywhere since there'll always be legacy encodings in git repos. Reading regular files with :utf8 also results in worse memory management since the file size cannot be used as a hint. --- lib/PublicInbox/MailDiff.pm | 3 ++- lib/PublicInbox/SearchIdx.pm | 5 ++++- lib/PublicInbox/Spawn.pm | 32 +++++++++++--------------------- 3 files changed, 17 insertions(+), 23 deletions(-) diff --git a/lib/PublicInbox/MailDiff.pm b/lib/PublicInbox/MailDiff.pm index e4e262ef..125360fe 100644 --- a/lib/PublicInbox/MailDiff.pm +++ b/lib/PublicInbox/MailDiff.pm @@ -65,6 +65,7 @@ sub next_smsg ($) { sub emit_msg_diff { my ($bref, $self) = @_; # bref is `git diff' output require PublicInbox::Hval; + PublicInbox::Hval::utf8_maybe($$bref); # will be escaped to `•' in HTML $self->{ctx}->{ibx}->{obfuscate} and @@ -81,7 +82,7 @@ sub do_diff { my $dir = "$self->{tmp}/$n"; $self->dump_eml($dir, $eml); my $cmd = [ qw(git diff --no-index --no-color -- a), $n ]; - my $opt = { -C => "$self->{tmp}", quiet => 1, 1 => [':utf8', \my $o] }; + my $opt = { -C => "$self->{tmp}", quiet => 1 }; my $qsp = PublicInbox::Qspawn->new($cmd, undef, $opt); $qsp->psgi_qx($self->{ctx}->{env}, undef, \&emit_msg_diff, $self); } diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 17538027..86c435fd 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -355,8 +355,11 @@ sub index_body_text { my $rd; if ($$sref =~ /^(?:diff|---|\+\+\+) /ms) { # start patch-id in parallel my $git = ($self->{ibx} // $self->{eidx} // $self)->git; + my $fh = PublicInbox::IO::write_file '+>:utf8', undef, $$sref; + $fh->flush or die "flush: $!"; + sysseek($fh, 0, SEEK_SET); $rd = popen_rd($git->cmd(qw(patch-id --stable)), undef, - { 0 => [ ':utf8', $sref ] }); + { 0 => $fh }); } # split off quoted and unquoted blocks: diff --git a/lib/PublicInbox/Spawn.pm b/lib/PublicInbox/Spawn.pm index 9c680690..e6b12994 100644 --- a/lib/PublicInbox/Spawn.pm +++ b/lib/PublicInbox/Spawn.pm @@ -332,18 +332,6 @@ sub which ($) { undef; } -sub scalar_redirect { - my ($layer, $opt, $child_fd, $bref) = @_; - open my $fh, '+>'.$layer, undef; - $opt->{"fh.$child_fd"} = $fh; - if ($child_fd == 0) { - print $fh $$bref; - $fh->flush or die "flush: $!"; - sysseek($fh, 0, SEEK_SET); - } - fileno($fh); -} - sub spawn ($;$$) { my ($cmd, $env, $opt) = @_; my $f = which($cmd->[0]) // die "$cmd->[0]: command not found\n"; @@ -354,14 +342,18 @@ sub spawn ($;$$) { } for my $child_fd (0..2) { my $pfd = $opt->{$child_fd}; - if ('ARRAY' eq ref($pfd)) { - my ($layer, $bref) = @$pfd; - $pfd = scalar_redirect($layer, $opt, $child_fd, $bref) - } elsif ('SCALAR' eq ref($pfd)) { - $pfd = scalar_redirect('', $opt, $child_fd, $pfd); + if ('SCALAR' eq ref($pfd)) { + open my $fh, '+>', undef; + $opt->{"fh.$child_fd"} = $fh; # for read_out_err + if ($child_fd == 0) { + print $fh $$pfd; + $fh->flush or die "flush: $!"; + sysseek($fh, 0, SEEK_SET); + } + $pfd = fileno($fh); } elsif (defined($pfd) && $pfd !~ /\A[0-9]+\z/) { my $fd = fileno($pfd) // - die "$pfd not an IO GLOB? $!"; + croak "BUG: $pfd not an IO GLOB? $!"; $pfd = $fd; } $rdr[$child_fd] = $pfd // $child_fd; @@ -399,9 +391,7 @@ sub read_out_err ($) { for my $fd (1, 2) { # read stdout/stderr my $fh = delete($opt->{"fh.$fd"}) // next; seek($fh, 0, SEEK_SET); - my $dst = $opt->{$fd}; - $dst = $opt->{$fd} = $dst->[1] if ref($dst) eq 'ARRAY'; - PublicInbox::IO::read_all $fh, 0, $dst + PublicInbox::IO::read_all $fh, undef, $opt->{$fd}; } }