From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 9F3FA1F9E0; Fri, 1 May 2020 06:59:04 +0000 (UTC) Date: Fri, 1 May 2020 06:59:04 +0000 From: Eric Wong To: meta@public-inbox.org Subject: [RFC/WIP] msg_iter: make ->each_part method for PublicInbox::MIME Message-ID: <20200501065904.GA6491@dcvr> MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Disposition: inline List-Id: The reliance on Email::MIME->subparts is a tad inefficient with a work-in-progress module to replace Email::MIME. So move towards using ->each_part as a class-specific iterator which can take advantage of more class-specific optimizations in the yet-to-be-revealed PublicInbox::Eml and PublicInbox::Gmime classes. The msg_iter() sub remains for compatibility with existing 3rd-party scripts/modules which use our small public Perl API and Email::MIME. --- I'm not completely sure about this one, but strongly leaning towards it since ->subparts object creation is a bit annoying to do up front. lib/PublicInbox/MIME.pm | 3 +++ lib/PublicInbox/MsgIter.pm | 15 +++++++++++++-- lib/PublicInbox/SolverGit.pm | 4 ++-- lib/PublicInbox/View.pm | 10 +++++----- lib/PublicInbox/WwwAttach.pm | 4 ++-- 5 files changed, 25 insertions(+), 11 deletions(-) diff --git a/lib/PublicInbox/MIME.pm b/lib/PublicInbox/MIME.pm index 456eed64b8c..b795b93b105 100644 --- a/lib/PublicInbox/MIME.pm +++ b/lib/PublicInbox/MIME.pm @@ -24,6 +24,7 @@ use strict; use warnings; use base qw(Email::MIME); use Email::MIME::ContentType; +use PublicInbox::MsgIter (); $Email::MIME::ContentType::STRICT_PARAMS = 0; if ($Email::MIME::VERSION <= 1.937) { @@ -101,4 +102,6 @@ sub parts_multipart { } } +no warnings 'once'; +*each_part = \&PublicInbox::MsgIter::em_each_part; 1; diff --git a/lib/PublicInbox/MsgIter.pm b/lib/PublicInbox/MsgIter.pm index fa25564a5db..cd5a5d99564 100644 --- a/lib/PublicInbox/MsgIter.pm +++ b/lib/PublicInbox/MsgIter.pm @@ -7,12 +7,12 @@ use strict; use warnings; use base qw(Exporter); our @EXPORT = qw(msg_iter msg_part_text); -use PublicInbox::MIME; +# This becomes PublicInbox::MIME->each_part: # Like Email::MIME::walk_parts, but this is: # * non-recursive # * passes depth and indices to the iterator callback -sub msg_iter ($$;$$) { +sub em_each_part ($$;$$) { my ($mime, $cb, $cb_arg, $do_undef) = @_; my @parts = $mime->subparts; if (@parts) { @@ -36,6 +36,17 @@ sub msg_iter ($$;$$) { } } +# Use this when we may accept Email::MIME from user scripts +# (not just PublicInbox::MIME) +sub msg_iter ($$;$$) { # $_[0] = PublicInbox::MIME/Email::MIME-like obj + my (undef, $cb, $cb_arg, $once) = @_; + if (my $ep = $_[0]->can('each_part')) { # PublicInbox::{MIME,*} + $ep->($_[0], $cb, $cb_arg, $once); + } else { # for compatibility with existing Email::MIME users: + em_each_part($_[0], $cb, $cb_arg, $once); + } +} + sub msg_part_text ($$) { my ($part, $ct) = @_; diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm index c32a5baecdb..f718e28cbd5 100644 --- a/lib/PublicInbox/SolverGit.pm +++ b/lib/PublicInbox/SolverGit.pm @@ -14,7 +14,7 @@ use 5.010_001; use File::Temp 0.19 (); # 0.19 for ->newdir use Fcntl qw(SEEK_SET); use PublicInbox::Git qw(git_unquote git_quote); -use PublicInbox::MsgIter qw(msg_iter msg_part_text); +use PublicInbox::MsgIter qw(msg_part_text); use PublicInbox::Qspawn; use PublicInbox::Tmpfile; use URI::Escape qw(uri_escape_utf8); @@ -234,7 +234,7 @@ sub find_extract_diffs ($$$) { my $diffs = []; foreach my $smsg (@$msgs) { $ibx->smsg_mime($smsg) or next; - msg_iter(delete $smsg->{mime}, \&extract_diff, + delete($smsg->{mime})->each_part(\&extract_diff, [$self, $diffs, $pre, $post, $ibx, $smsg], 1); } @$diffs ? $diffs : undef; diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index 5144a130460..4ede83878a3 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -243,7 +243,7 @@ sub index_entry { # scan through all parts, looking for displayable text $ctx->{mhref} = $mhref; $ctx->{obuf} = \$rv; - msg_iter($mime, \&add_text_body, $ctx, 1); + $mime->each_part(\&add_text_body, $ctx, 1); delete $ctx->{obuf}; # add the footer @@ -474,10 +474,10 @@ sub thread_html_i { # PublicInbox::WwwStream::getline callback } sub multipart_text_as_html { - # ($mime, $ctx) = @_; # msg_iter will do "$_[0] = undef" + # ($mime, $ctx) = @_; # each_part may do "$_[0] = undef" # scan through all parts, looking for displayable text - msg_iter($_[0], \&add_text_body, $_[1], 1); + $_[0]->each_part(\&add_text_body, $_[1], 1); } sub attach_link ($$$$;$) { @@ -515,11 +515,11 @@ EOF undef; } -sub add_text_body { # callback for msg_iter +sub add_text_body { # callback for each_part my ($p, $ctx) = @_; my $upfx = $ctx->{mhref}; my $ibx = $ctx->{-inbox}; - # $p - from msg_iter: [ Email::MIME, depth, @idx ] + # $p - from each_part: [ Email::MIME-like, depth, @idx ] my ($part, $depth, @idx) = @$p; my $ct = $part->content_type || 'text/plain'; my $fn = $part->filename; diff --git a/lib/PublicInbox/WwwAttach.pm b/lib/PublicInbox/WwwAttach.pm index f795618ebcf..774b38ae269 100644 --- a/lib/PublicInbox/WwwAttach.pm +++ b/lib/PublicInbox/WwwAttach.pm @@ -10,7 +10,7 @@ use Email::MIME::ContentType qw(parse_content_type); use PublicInbox::MIME; use PublicInbox::MsgIter; -sub get_attach_i { # msg_iter callback +sub get_attach_i { # ->each_part callback my ($part, $depth, @idx) = @{$_[0]}; my $res = $_[1]; return if join('.', @idx) ne $res->[3]; # $idx @@ -40,7 +40,7 @@ sub get_attach ($$$) { my $mime = $ctx->{-inbox}->msg_by_mid($ctx->{mid}) or return $res; $mime = PublicInbox::MIME->new($mime); $res->[3] = $idx; - msg_iter($mime, \&get_attach_i, $res, 1); + $mime->each_part(\&get_attach_i, $res, 1); pop @$res; # cleanup before letting PSGI server see it $res }