From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.2 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF, T_SCC_BODY_TEXT_LINE shortcircuit=no autolearn=ham autolearn_force=no version=3.4.6 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 760241F69D for ; Thu, 2 Nov 2023 09:35:43 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1698917743; bh=uVSBRLlnoDG3XUBTkQUkuohghjDszRE78Igw3ymx168=; h=From:To:Subject:Date:In-Reply-To:References:From; b=ETc1xaveEY77MSe5dIQ0OP7lqSry298Ko5iLBPPYKIgvlkRcmssSKxsSH1oaizOOW 8hWSfz1XtbqvD57Uwy1N0LgjYar4xW3Oics/kNL6hKxqb5pg/+5hVY+EhRxMP5EDhP C8BTo1gNc2TtBlqWn/92DoraC+7uNerIg/vnvAWQ= From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 10/14] spawn: support PerlIO layer in scalar redirects Date: Thu, 2 Nov 2023 09:35:35 +0000 Message-Id: <20231102093539.2067470-11-e@80x24.org> In-Reply-To: <20231102093539.2067470-1-e@80x24.org> References: <20231102093539.2067470-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: We have to deal with UTF-8 data for generating patches, so make it easier to pass Perl utf8 data to git, diff, sdiff, etc. to avoid "Wide character" warnings. --- lib/PublicInbox/MailDiff.pm | 3 +-- lib/PublicInbox/SearchIdx.pm | 2 +- lib/PublicInbox/Spawn.pm | 30 ++++++++++++++++++++---------- 3 files changed, 22 insertions(+), 13 deletions(-) diff --git a/lib/PublicInbox/MailDiff.pm b/lib/PublicInbox/MailDiff.pm index c7b991f1..b1c12d6d 100644 --- a/lib/PublicInbox/MailDiff.pm +++ b/lib/PublicInbox/MailDiff.pm @@ -63,7 +63,6 @@ sub next_smsg ($) { sub emit_msg_diff { my ($bref, $self) = @_; # bref is `git diff' output # will be escaped to `•' in HTML - utf8::decode($$bref); $self->{ctx}->{ibx}->{obfuscate} and obfuscate_addrs($self->{ctx}->{ibx}, $$bref, "\x{2022}"); print { $self->{ctx}->{zfh} } '
' if $self->{nr} > 1;
@@ -77,7 +76,7 @@ sub do_diff {
 	my $dir = "$self->{tmp}/$n";
 	$self->dump_eml($dir, $eml);
 	my $cmd = [ qw(git diff --no-index --no-color -- a), $n ];
-	my $opt = { -C => "$self->{tmp}", quiet => 1 };
+	my $opt = { -C => "$self->{tmp}", quiet => 1, 1 => [':utf8', \my $o] };
 	my $qsp = PublicInbox::Qspawn->new($cmd, undef, $opt);
 	$qsp->psgi_qx($self->{ctx}->{env}, undef, \&emit_msg_diff, $self);
 }
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 78519b22..9566b14d 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -353,7 +353,7 @@ sub index_diff ($$$) {
 sub patch_id {
 	my ($self, $sref) = @_;
 	my $git = ($self->{ibx} // $self->{eidx} // $self)->git;
-	my $opt = { 0 => $sref, 2 => \(my $err) };
+	my $opt = { 0 => [ ':utf8', $sref ], 2 => \(my $err) };
 	my $id = run_qx($git->cmd(qw(patch-id --stable)), undef, $opt);
 	warn $err if $err;
 	$id =~ /\A([a-f0-9]{40,})/ ? $1 : undef;
diff --git a/lib/PublicInbox/Spawn.pm b/lib/PublicInbox/Spawn.pm
index d3b7ef6f..b0edeb33 100644
--- a/lib/PublicInbox/Spawn.pm
+++ b/lib/PublicInbox/Spawn.pm
@@ -332,6 +332,18 @@ sub which ($) {
 	undef;
 }
 
+sub scalar_redirect {
+	my ($layer, $opt, $child_fd, $bref) = @_;
+	open my $fh, '+>'.$layer, undef;
+	$opt->{"fh.$child_fd"} = $fh;
+	if ($child_fd == 0) {
+		print $fh $$bref;
+		$fh->flush or die "flush: $!";
+		sysseek($fh, 0, SEEK_SET);
+	}
+	fileno($fh);
+}
+
 sub spawn ($;$$) {
 	my ($cmd, $env, $opt) = @_;
 	my $f = which($cmd->[0]) // die "$cmd->[0]: command not found\n";
@@ -342,15 +354,11 @@ sub spawn ($;$$) {
 	}
 	for my $child_fd (0..2) {
 		my $pfd = $opt->{$child_fd};
-		if ('SCALAR' eq ref($pfd)) {
-			open my $fh, '+>:utf8', undef;
-			$opt->{"fh.$child_fd"} = $fh;
-			if ($child_fd == 0) {
-				print $fh $$pfd;
-				$fh->flush or die "flush: $!";
-				sysseek($fh, 0, SEEK_SET);
-			}
-			$pfd = fileno($fh);
+		if ('ARRAY' eq ref($pfd)) {
+			my ($layer, $bref) = @$pfd;
+			$pfd = scalar_redirect($layer, $opt, $child_fd, $bref)
+		} elsif ('SCALAR' eq ref($pfd)) {
+			$pfd = scalar_redirect('', $opt, $child_fd, $pfd);
 		} elsif (defined($pfd) && $pfd !~ /\A[0-9]+\z/) {
 			my $fd = fileno($pfd) //
 					die "$pfd not an IO GLOB? $!";
@@ -394,7 +402,9 @@ sub read_out_err ($) {
 	for my $fd (1, 2) { # read stdout/stderr
 		my $fh = delete($opt->{"fh.$fd"}) // next;
 		seek($fh, 0, SEEK_SET);
-		${$opt->{$fd}} .= <$fh>;
+		my $dst = $opt->{$fd};
+		$dst = $opt->{$fd} = $dst->[1] if ref($dst) eq 'ARRAY';
+		$$dst .= <$fh>;
 		$fh->error and croak "E: read(FD=$fd): $!";
 	}
 }