From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 832551F9FE for ; Fri, 12 Feb 2021 07:05:52 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 2/3] import_mbox: use MboxReader Date: Fri, 12 Feb 2021 00:05:51 -0700 Message-Id: <20210212070552.13901-3-e@80x24.org> In-Reply-To: <20210212070552.13901-1-e@80x24.org> References: <20210212070552.13901-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: It supports more mbox variants and it's trailing newline behavior is probably more correct despite the previous change to PublicInbox::Filter::Vger. --- lib/PublicInbox/InboxWritable.pm | 44 +++++++------------------------- 1 file changed, 9 insertions(+), 35 deletions(-) diff --git a/lib/PublicInbox/InboxWritable.pm b/lib/PublicInbox/InboxWritable.pm index c3acc4f9..d4a9040f 100644 --- a/lib/PublicInbox/InboxWritable.pm +++ b/lib/PublicInbox/InboxWritable.pm @@ -158,49 +158,23 @@ sub import_maildir { $im->done; } -# asctime: From example@example.com Fri Jun 23 02:56:55 2000 -my $from_strict = qr/^From \S+ +\S+ \S+ +\S+ [^:]+:[^:]+:[^:]+ [^:]+/; - -sub mb_add ($$$$) { - my ($im, $variant, $filter, $msg) = @_; - $$msg =~ s/(\r?\n)+\z/$1/s; - if ($variant eq 'mboxrd') { - $$msg =~ s/^>(>*From )/$1/gms; - } elsif ($variant eq 'mboxo') { - $$msg =~ s/^>From /From /gms; - } - my $mime = PublicInbox::Eml->new($msg); +sub _mbox_eml_cb { # MboxReader->mbox* callback + my ($eml, $im, $filter) = @_; if ($filter) { - my $ret = $filter->scrub($mime) or return; + my $ret = $filter->scrub($eml) or return; return if $ret == REJECT(); - $mime = $ret; + $eml = $ret; } - $im->add($mime) + $im->add($eml); } sub import_mbox { my ($self, $fh, $variant) = @_; - if ($variant !~ /\A(?:mboxrd|mboxo)\z/) { - die "variant must be 'mboxrd' or 'mboxo'\n"; - } + require PublicInbox::MboxReader; + my $cb = PublicInbox::MboxReader->can($variant) or + die "$variant not supported\n"; my $im = $self->importer(1); - my $prev = undef; - my $msg = ''; - my $filter = $self->filter; - while (defined(my $l = <$fh>)) { - if ($l =~ /$from_strict/o) { - if (!defined($prev) || $prev =~ /^\r?$/) { - mb_add($im, $variant, $filter, \$msg) if $msg; - $msg = ''; - $prev = $l; - next; - } - warn "W[$.] $l\n"; - } - $prev = $l; - $msg .= $l; - } - mb_add($im, $variant, $filter, \$msg) if $msg; + $cb->(undef, $fh, \&_mbox_eml_cb, $im, $self->filter); $im->done; }