* [PATCH 0/2] header limit and consistency
@ 2020-05-10 6:21 Eric Wong
2020-05-10 6:21 ` [PATCH 1/2] eml: enforce a maximum header length Eric Wong
2020-05-10 6:21 ` [PATCH 2/2] eml: rename limits to match postfix names Eric Wong
0 siblings, 2 replies; 3+ messages in thread
From: Eric Wong @ 2020-05-10 6:21 UTC (permalink / raw)
To: meta
We take most of these limits from postfix, so be consistent with
the corresponding option names used by postfix in our internal
code.
Limiting header scanning serves the same purpose as the new
`--max-size' option for -index (and the default matches
postfix).
Eric Wong (2):
eml: enforce a maximum header length
eml: rename limits to match postfix names
lib/PublicInbox/Eml.pm | 41 +++++++++++++++++++++++++++++++++--------
t/eml.t | 29 +++++++++++++++++++++++++++--
2 files changed, 60 insertions(+), 10 deletions(-)
^ permalink raw reply [flat|nested] 3+ messages in thread
* [PATCH 1/2] eml: enforce a maximum header length
2020-05-10 6:21 [PATCH 0/2] header limit and consistency Eric Wong
@ 2020-05-10 6:21 ` Eric Wong
2020-05-10 6:21 ` [PATCH 2/2] eml: rename limits to match postfix names Eric Wong
1 sibling, 0 replies; 3+ messages in thread
From: Eric Wong @ 2020-05-10 6:21 UTC (permalink / raw)
To: meta
While our header processing is more efficient than
Email::*::Header, capping the maximum size for a `m//g' match
still limits memory growth on a header we care for.
Use the same limit as postfix (header_size_limit=102400), since
messages fetched via git/HTTP/NNTP/etc can bypass MTA limits.
---
lib/PublicInbox/Eml.pm | 23 ++++++++++++++++++++++-
t/eml.t | 25 +++++++++++++++++++++++++
2 files changed, 47 insertions(+), 1 deletion(-)
diff --git a/lib/PublicInbox/Eml.pm b/lib/PublicInbox/Eml.pm
index f022516c12c..2ccbb6597de 100644
--- a/lib/PublicInbox/Eml.pm
+++ b/lib/PublicInbox/Eml.pm
@@ -41,6 +41,7 @@ $PublicInbox::EmlContentFoo::STRICT_PARAMS = 0;
our $MAXPARTS = 1000; # same as SpamAssassin
our $MAXDEPTH = 20; # seems enough, Perl sucks, here
our $MAXBOUNDLEN = 2048; # same as postfix
+our $header_size_limit = 102400; # same as postfix
my %MIME_ENC = (qp => \&enc_qp, base64 => \&encode_base64);
my %MIME_DEC = (qp => \&dec_qp, base64 => \&decode_base64);
@@ -68,6 +69,22 @@ sub re_memo ($) {
/ismx
}
+sub hdr_truncate ($) {
+ my $len = length($_[0]);
+ substr($_[0], $header_size_limit, $len) = '';
+ my $end = rindex($_[0], "\n");
+ if ($end >= 0) {
+ ++$end;
+ substr($_[0], $end, $len) = '';
+ warn "header of $len bytes truncated to $end bytes\n";
+ } else {
+ $_[0] = '';
+ warn <<EOF
+header of $len bytes without `\\n' within $header_size_limit ignored
+EOF
+ }
+}
+
# compatible with our uses of Email::MIME
sub new {
my $ref = ref($_[1]) ? $_[1] : \(my $cpy = $_[1]);
@@ -81,14 +98,18 @@ sub new {
# likely on *nix
my $hdr = substr($$ref, 0, $pos + 2, ''); # sv_chop on $$ref
chop($hdr); # lower SvCUR
+ hdr_truncate($hdr) if length($hdr) > $header_size_limit;
bless { hdr => \$hdr, crlf => "\n", bdy => $ref }, __PACKAGE__;
} elsif ($$ref =~ /\r?\n(\r?\n)/s) {
my $hdr = substr($$ref, 0, $+[0], ''); # sv_chop on $$ref
substr($hdr, -(length($1))) = ''; # lower SvCUR
+ hdr_truncate($hdr) if length($hdr) > $header_size_limit;
bless { hdr => \$hdr, crlf => $1, bdy => $ref }, __PACKAGE__;
} elsif ($$ref =~ /^[a-z0-9-]+[ \t]*:/ims && $$ref =~ /(\r?\n)\z/s) {
# body is optional :P
- bless { hdr => \($$ref), crlf => $1 }, __PACKAGE__;
+ my $hdr = substr($$ref, 0, $header_size_limit + 1);
+ hdr_truncate($hdr) if length($hdr) > $header_size_limit;
+ bless { hdr => \$hdr, crlf => $1 }, __PACKAGE__;
} else { # nothing useful
my $hdr = $$ref = '';
bless { hdr => \$hdr, crlf => "\n" }, __PACKAGE__;
diff --git a/t/eml.t b/t/eml.t
index 43c735e76b9..d5e8cbcbbba 100644
--- a/t/eml.t
+++ b/t/eml.t
@@ -252,6 +252,31 @@ EOF
'final "\n" preserved on missing epilogue');
}
+if ('header_size_limit stolen from postfix') {
+ local $PublicInbox::Eml::header_size_limit = 4;
+ my @w;
+ local $SIG{__WARN__} = sub { push @w, @_ };
+ my $eml = PublicInbox::Eml->new("a:b\na:d\n\nzz");
+ is_deeply([$eml->header('a')], ['b'], 'no overrun header');
+ is($eml->body_raw, 'zz', 'body not damaged');
+ is($eml->header_obj->as_string, "a:b\n", 'header truncated');
+ is(grep(/truncated/, @w), 1, 'truncation warned');
+
+ $eml = PublicInbox::Eml->new("a:b\na:d\n");
+ is_deeply([$eml->header('a')], ['b'], 'no overrun header w/o body');
+
+ local $PublicInbox::Eml::header_size_limit = 5;
+ $eml = PublicInbox::Eml->new("a:b\r\na:d\r\n\nzz");
+ is_deeply([$eml->header('a')], ['b'], 'no overrun header on CRLF');
+ is($eml->body_raw, 'zz', 'body not damaged');
+
+ @w = ();
+ $eml = PublicInbox::Eml->new("too:long\n");
+ $eml = PublicInbox::Eml->new("too:long\n\n");
+ $eml = PublicInbox::Eml->new("too:long\r\n\r\n");
+ is(grep(/ignored/, @w), 3, 'ignored header warned');
+}
+
if ('maxparts is a feature unique to us') {
my $eml = eml_load 't/psgi_attach.eml';
my @orig;
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [PATCH 2/2] eml: rename limits to match postfix names
2020-05-10 6:21 [PATCH 0/2] header limit and consistency Eric Wong
2020-05-10 6:21 ` [PATCH 1/2] eml: enforce a maximum header length Eric Wong
@ 2020-05-10 6:21 ` Eric Wong
1 sibling, 0 replies; 3+ messages in thread
From: Eric Wong @ 2020-05-10 6:21 UTC (permalink / raw)
To: meta
They're still part of our internal API at this point, but
reusing the same names as those used by postfix makes sense for
now to reduce cognitive overheads of learning new things.
There's no "mime_parts_limit", but the name is consistent
with "mime_nesting_limit".
---
lib/PublicInbox/Eml.pm | 18 +++++++++++-------
t/eml.t | 4 ++--
2 files changed, 13 insertions(+), 9 deletions(-)
diff --git a/lib/PublicInbox/Eml.pm b/lib/PublicInbox/Eml.pm
index 2ccbb6597de..ef401141c13 100644
--- a/lib/PublicInbox/Eml.pm
+++ b/lib/PublicInbox/Eml.pm
@@ -38,9 +38,11 @@ my $MIME_Header = find_encoding('MIME-Header');
use PublicInbox::EmlContentFoo qw(parse_content_type parse_content_disposition);
$PublicInbox::EmlContentFoo::STRICT_PARAMS = 0;
-our $MAXPARTS = 1000; # same as SpamAssassin
-our $MAXDEPTH = 20; # seems enough, Perl sucks, here
-our $MAXBOUNDLEN = 2048; # same as postfix
+our $mime_parts_limit = 1000; # same as SpamAssassin (not in postfix AFAIK)
+
+# the rest of the limit names are taken from postfix:
+our $mime_nesting_limit = 20; # seems enough, Perl sucks, here
+our $mime_boundary_length_limit = 2048; # same as postfix
our $header_size_limit = 102400; # same as postfix
my %MIME_ENC = (qp => \&enc_qp, base64 => \&encode_base64);
@@ -151,7 +153,7 @@ sub ct ($) {
sub mp_descend ($$) {
my ($self, $nr) = @_; # or $once for top-level
my $bnd = ct($self)->{attributes}->{boundary} // return; # single-part
- return if $bnd eq '' || length($bnd) >= $MAXBOUNDLEN;
+ return if $bnd eq '' || length($bnd) >= $mime_boundary_length_limit;
$bnd = quotemeta($bnd);
# "multipart" messages can exist w/o a body
@@ -179,7 +181,7 @@ sub mp_descend ($$) {
# + 3 since we don't want the last part
# processed to include any other excluded
# parts ($nr starts at 1, and I suck at math)
- $MAXPARTS + 3 - $nr);
+ $mime_parts_limit + 3 - $nr);
if (@parts) { # the usual path if we got this far:
undef $bdy; # release memory ASAP if $nr > 0
@@ -218,13 +220,15 @@ sub each_part {
$p = [ $p, 0 ];
my @s; # our virtual stack
my $nr = 0;
- while ((scalar(@{$p->[0]}) || ($p = pop @s)) && ++$nr <= $MAXPARTS) {
+ while ((scalar(@{$p->[0]}) || ($p = pop @s)) &&
+ ++$nr <= $mime_parts_limit) {
++$p->[-1]; # bump index
my (undef, @idx) = @$p;
@idx = (join('.', @idx));
my $depth = ($idx[0] =~ tr/././) + 1;
my $sub = new_sub(undef, \(shift @{$p->[0]}));
- if ($depth < $MAXDEPTH && (my $nxt = mp_descend($sub, $nr))) {
+ if ($depth < $mime_nesting_limit &&
+ (my $nxt = mp_descend($sub, $nr))) {
push(@s, $p) if scalar @{$p->[0]};
$p = [ $nxt, @idx, 0 ];
} else { # a leaf node
diff --git a/t/eml.t b/t/eml.t
index d5e8cbcbbba..c91deb3ab29 100644
--- a/t/eml.t
+++ b/t/eml.t
@@ -282,7 +282,7 @@ if ('maxparts is a feature unique to us') {
my @orig;
$eml->each_part(sub { push @orig, $_[0]->[0] });
- local $PublicInbox::Eml::MAXPARTS = scalar(@orig);
+ local $PublicInbox::Eml::mime_parts_limit = scalar(@orig);
my $i = 0;
$eml->each_part(sub {
my $cur = $_[0]->[0];
@@ -290,7 +290,7 @@ if ('maxparts is a feature unique to us') {
is($cur->body_raw, $prv->body_raw, "part #$i matches");
});
is($i, scalar(@orig), 'maxparts honored');
- $PublicInbox::Eml::MAXPARTS--;
+ $PublicInbox::Eml::mime_parts_limit--;
my @ltd;
$eml->each_part(sub { push @ltd, $_[0]->[0] });
for ($i = 0; $i <= $#ltd; $i++) {
^ permalink raw reply related [flat|nested] 3+ messages in thread
end of thread, other threads:[~2020-05-10 6:21 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-05-10 6:21 [PATCH 0/2] header limit and consistency Eric Wong
2020-05-10 6:21 ` [PATCH 1/2] eml: enforce a maximum header length Eric Wong
2020-05-10 6:21 ` [PATCH 2/2] eml: rename limits to match postfix names Eric Wong
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).