From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 37AC41FA10 for ; Wed, 13 Oct 2021 10:16:12 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 4/7] eml: avoid Encode 2.87..3.12 leak Date: Wed, 13 Oct 2021 10:16:08 +0000 Message-Id: <20211013101611.22962-5-e@80x24.org> In-Reply-To: <20211013101611.22962-1-e@80x24.org> References: <20211013101611.22962-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: Encode::FB_CROAK leaks memory in old versions of Encode: Since I expect there's still many users on old systems and old Perls, we can use "$SIG{__WARN__} = \&croak" here with Encode::FB_WARN to emulate Encode::FB_CROAK behavior. --- lib/PublicInbox/Eml.pm | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/lib/PublicInbox/Eml.pm b/lib/PublicInbox/Eml.pm index 0867a016..69c26932 100644 --- a/lib/PublicInbox/Eml.pm +++ b/lib/PublicInbox/Eml.pm @@ -28,7 +28,7 @@ package PublicInbox::Eml; use strict; use v5.10.1; use Carp qw(croak); -use Encode qw(find_encoding decode encode); # stdlib +use Encode qw(find_encoding); # stdlib use Text::Wrap qw(wrap); # stdlib, we need Perl 5.6+ for $huge use MIME::Base64 3.05; # Perl 5.10.0 / 5.9.2 use MIME::QuotedPrint 3.05; # ditto @@ -334,9 +334,14 @@ sub body_set { sub body_str_set { my ($self, $body_str) = @_; - my $charset = ct($self)->{attributes}->{charset} or + my $cs = ct($self)->{attributes}->{charset} // croak('body_str was given, but no charset is defined'); - body_set($self, \(encode($charset, $body_str, Encode::FB_CROAK))); + my $enc = find_encoding($cs) // croak "unknown encoding `$cs'"; + $body_str = do { + local $SIG{__WARN__} = \&croak; + $enc->encode($body_str, Encode::FB_WARN); + }; + body_set($self, \$body_str); } sub content_type { scalar header($_[0], 'Content-Type') } @@ -452,15 +457,17 @@ sub body { sub body_str { my ($self) = @_; my $ct = ct($self); - my $charset = $ct->{attributes}->{charset}; - if (!$charset) { - if ($STR_TYPE{$ct->{type}} && $STR_SUBTYPE{$ct->{subtype}}) { + my $cs = $ct->{attributes}->{charset} // do { + ($STR_TYPE{$ct->{type}} && $STR_SUBTYPE{$ct->{subtype}}) and return body($self); - } croak("can't get body as a string for ", join("\n\t", header_raw($self, 'Content-Type'))); - } - decode($charset, body($self), Encode::FB_CROAK); + }; + my $enc = find_encoding($cs) or croak "unknown encoding `$cs'"; + my $tmp = body($self); + # workaround https://rt.cpan.org/Public/Bug/Display.html?id=139622 + local $SIG{__WARN__} = \&croak; + $enc->decode($tmp, Encode::FB_WARN); } sub as_string {