From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id C4E0E1FA04 for ; Sat, 16 May 2020 10:03:23 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 1/2] t/psgi_attach: assert message/* parts are downloadable Date: Sat, 16 May 2020 10:03:21 +0000 Message-Id: <20200516100322.19793-2-e@yhbt.net> In-Reply-To: <20200516100322.19793-1-e@yhbt.net> References: <20200516100322.19793-1-e@yhbt.net> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: We'll be adding support to descend into message/rfc822 (and legacy message/news) attachments. First, we must ensure existing message/rfc822 attachments can be downloaded and remain downloadable in future commits. --- MANIFEST | 1 + t/data/message_embed.eml | 163 +++++++++++++++++++++++++++++++++++++++ t/psgi_attach.t | 18 +++++ 3 files changed, 182 insertions(+) create mode 100644 t/data/message_embed.eml diff --git a/MANIFEST b/MANIFEST index 7997bc9906c..24f95faa942 100644 --- a/MANIFEST +++ b/MANIFEST @@ -226,6 +226,7 @@ t/config_limiter.t t/content_hash.t t/convert-compact.t t/data/0001.patch +t/data/message_embed.eml t/ds-kqxs.t t/ds-leak.t t/ds-poll.t diff --git a/t/data/message_embed.eml b/t/data/message_embed.eml new file mode 100644 index 00000000000..a7aa88acee3 --- /dev/null +++ b/t/data/message_embed.eml @@ -0,0 +1,163 @@ +Received: from localhost (dcvr.yhbt.net [127.0.0.1]) + by dcvr.yhbt.net (Postfix) with ESMTP id 977481F45A; + Sat, 18 Apr 2020 22:25:08 +0000 (UTC) +Date: Sat, 18 Apr 2020 22:25:08 +0000 +From: Eric Wong +To: test@public-inbox.org +Subject: Re: embedded message test +Message-ID: <20200418222508.GA13918@dcvr> +References: <20200418222020.GA2745@dcvr> +MIME-Version: 1.0 +Content-Type: multipart/mixed; boundary="TB36FDmn/VVEgNH/" +Content-Disposition: inline +In-Reply-To: <20200418222020.GA2745@dcvr> + + +--TB36FDmn/VVEgNH/ +Content-Type: text/plain; charset=utf-8 +Content-Disposition: inline + +testing embedded message harder + +--TB36FDmn/VVEgNH/ +Content-Type: message/rfc822 +Content-Disposition: attachment; filename="embed2x.eml" + +Date: Sat, 18 Apr 2020 22:20:20 +0000 +From: Eric Wong +To: test@public-inbox.org +Subject: embedded message test +Message-ID: <20200418222020.GA2745@dcvr> +MIME-Version: 1.0 +Content-Type: multipart/mixed; boundary="/04w6evG8XlLl3ft" +Content-Disposition: inline + +--/04w6evG8XlLl3ft +Content-Type: text/plain; charset=utf-8 +Content-Disposition: inline + +testing embedded message + +--/04w6evG8XlLl3ft +Content-Type: message/rfc822 +Content-Disposition: attachment; filename="test.eml" + +From: Eric Wong +To: spew@80x24.org +Subject: [PATCH] mail header experiments +Date: Sat, 18 Apr 2020 21:41:14 +0000 +Message-Id: <20200418214114.7575-1-e@yhbt.net> +MIME-Version: 1.0 +Content-Transfer-Encoding: 8bit + +--- + lib/PublicInbox/MailHeader.pm | 55 +++++++++++++++++++++++++++++++++++ + t/mail_header.t | 31 ++++++++++++++++++++ + 2 files changed, 86 insertions(+) + create mode 100644 lib/PublicInbox/MailHeader.pm + create mode 100644 t/mail_header.t + +diff --git a/lib/PublicInbox/MailHeader.pm b/lib/PublicInbox/MailHeader.pm +new file mode 100644 +index 00000000..166baf91 +--- /dev/null ++++ b/lib/PublicInbox/MailHeader.pm +@@ -0,0 +1,55 @@ ++# Copyright (C) 2020 all contributors ++# License: AGPL-3.0+ ++package PublicInbox::MailHeader; ++use strict; ++use HTTP::Parser::XS qw(parse_http_response HEADERS_AS_ARRAYREF); ++use bytes (); #bytes::length ++my %casemap; ++ ++sub _headerx_to_list { ++ my (undef, $head, $crlf) = @_; ++ ++ # picohttpparser uses `int' as the return value, so the ++ # actual limit is 2GB on most platforms. However, headers ++ # exceeding (or even close to) 1MB seems unreasonable ++ die 'headers too big' if bytes::length($$head) > 0x100000; ++ my ($ret, undef, undef, undef, $headers) = ++ parse_http_response('HTTP/1.0 1 X'. $crlf . $$head, ++ HEADERS_AS_ARRAYREF); ++ die 'failed to parse headers' if $ret <= 0; ++ # %casemap = map {; lc($_) => $_ } ($$head =~ m/^([^:]+):/gsm); ++ # my $nr = @$headers; ++ for (my $i = 0; $i < @$headers; $i += 2) { ++ my $key = $headers->[$i]; # = $casemap{$headers->[$i]}; ++ my $val = $headers->[$i + 1]; ++ (my $trimmed = $val) =~ s/\r?\n\s+/ /; ++ $headers->[$i + 1] = [ ++ $trimmed, ++ "$key: $val" ++ ] ++ } ++ $headers; ++} ++ ++sub _header_to_list { ++ my (undef, $head, $crlf) = @_; ++ my @tmp = ($$head =~ m/^(([^ \t:][^:\n]*):[ \t]* ++ ([^\n]*\n(?:[ \t]+[^\n]*\n)*))/gsmx); ++ my @headers; ++ $#headers = scalar @tmp; ++ @headers = (); ++ while (@tmp) { ++ my ($orig, $key, $val) = splice(@tmp, 0, 3); ++ # my $v = $tmp[$i + 2]; ++ # $v =~ s/\r?\n[ \t]+/ /sg; ++ # $v =~ s/\r?\n\z//s; ++ $val =~ s/\n[ \t]+/ /sg; ++ chomp($val, $orig); ++ # $val =~ s/\r?\n\z//s; ++ # $orig =~ s/\r?\n\z//s; ++ push @headers, $key, [ $val, $orig ]; ++ } ++ \@headers; ++} ++ ++1; +diff --git a/t/mail_header.t b/t/mail_header.t +new file mode 100644 +index 00000000..4dc62c50 +--- /dev/null ++++ b/t/mail_header.t +@@ -0,0 +1,31 @@ ++# Copyright (C) 2020 all contributors ++# License: AGPL-3.0+ ++use strict; ++use Test::More; ++use PublicInbox::TestCommon; ++require_mods('PublicInbox::MailHeader'); ++ ++my $head = <<'EOF'; ++From d0147582e289fdd4cdd84e91d8b0f8ae9c230124 Mon Sep 17 00:00:00 2001 ++From: Eric Wong ++Date: Fri, 17 Apr 2020 09:28:49 +0000 ++Subject: [PATCH] searchthread: reduce indirection by removing container ++ ++EOF ++my $orig = $head; ++use Email::Simple; ++my $xshdr = PublicInbox::MailHeader->_header_to_list(\$head, "\n"); ++my $simpl = Email::Simple::Header->_header_to_list(\$head, "\n"); ++is_deeply($xshdr, $simpl); ++use Benchmark qw(:all); ++my $res = timethese(100000, { ++ pmh => sub { ++ PublicInbox::MailHeader->_header_to_list(\$head, "\n"); ++ }, ++ esh => sub { ++ PublicInbox::MailHeader->_header_to_list(\$head, "\n"); ++ } ++}); ++is($head, $orig); ++use Data::Dumper; diag Dumper($res); ++done_testing; + + +--/04w6evG8XlLl3ft-- + + +--TB36FDmn/VVEgNH/-- diff --git a/t/psgi_attach.t b/t/psgi_attach.t index 9a2b241164a..12f9e6eeecd 100644 --- a/t/psgi_attach.t +++ b/t/psgi_attach.t @@ -15,6 +15,7 @@ use_ok 'PublicInbox::WWW'; use PublicInbox::Import; use PublicInbox::Git; use PublicInbox::Config; +use PublicInbox::Eml; use_ok 'PublicInbox::WwwAttach'; my $config = PublicInbox::Config->new(\<init_bare; my $txt = "plain\ntext\npass\nthrough\n"; my $dot = "dotfile\n"; $im->add(eml_load('t/psgi_attach.eml')); + $im->add(eml_load('t/data/message_embed.eml')); $im->done; my $www = PublicInbox::WWW->new($config); @@ -67,6 +69,22 @@ $im->init_bare; ok(length($dot_res) >= length($dot), 'dot almost matches'); $res = $cb->(GET('/test/Z%40B/4-any-filename.txt')); is($res->content, $dot_res, 'user-specified filename is OK'); + + my $mid = '20200418222508.GA13918@dcvr'; + my $irt = '20200418222020.GA2745@dcvr'; + $res = $cb->(GET("/test/$mid/")); + like($res->content, qr/\bhref="2-embed2x\.eml"/s, + 'href to message/rfc822 attachment visible'); + $res = $cb->(GET("/test/$mid/2-embed2x.eml")); + my $eml = PublicInbox::Eml->new(\($res->content)); + is_deeply([ $eml->header_raw('Message-ID') ], [ "<$irt>" ], + 'got attached eml'); + my @subs = $eml->subparts; + is(scalar(@subs), 2, 'attachment had 2 subparts'); + like($subs[0]->body_str, qr/^testing embedded message\n*\z/sm, + '1st attachment is as expected'); + is($subs[1]->header('Content-Type'), 'message/rfc822', + '2nd attachment is as expected'); }); } done_testing();