From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id CCE0E1FA17 for ; Thu, 31 Dec 2020 13:51:54 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 04/36] lei_to_mail: initial implementation for writing mbox formats Date: Thu, 31 Dec 2020 13:51:22 +0000 Message-Id: <20201231135154.6070-5-e@80x24.org> In-Reply-To: <20201231135154.6070-1-e@80x24.org> References: <20201231135154.6070-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: No Maildir, support, yet, but it'll come. --- MANIFEST | 2 + lib/PublicInbox/LeiToMail.pm | 109 +++++++++++++++++++++++++++++++++++ t/lei_to_mail.t | 65 +++++++++++++++++++++ 3 files changed, 176 insertions(+) create mode 100644 lib/PublicInbox/LeiToMail.pm create mode 100644 t/lei_to_mail.t diff --git a/MANIFEST b/MANIFEST index a5ff81cf..12b67e95 100644 --- a/MANIFEST +++ b/MANIFEST @@ -165,6 +165,7 @@ lib/PublicInbox/LEI.pm lib/PublicInbox/LeiExtinbox.pm lib/PublicInbox/LeiSearch.pm lib/PublicInbox/LeiStore.pm +lib/PublicInbox/LeiToMail.pm lib/PublicInbox/LeiXSearch.pm lib/PublicInbox/Linkify.pm lib/PublicInbox/Listener.pm @@ -328,6 +329,7 @@ t/kqnotify.t t/lei-oneshot.t t/lei.t t/lei_store.t +t/lei_to_mail.t t/lei_xsearch.t t/linkify.t t/main-bin/spamc diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm new file mode 100644 index 00000000..b0d4b664 --- /dev/null +++ b/lib/PublicInbox/LeiToMail.pm @@ -0,0 +1,109 @@ +# Copyright (C) 2020 all contributors +# License: AGPL-3.0+ + +# Writes PublicInbox::Eml objects atomically to a mbox variant or Maildir +package PublicInbox::LeiToMail; +use strict; +use v5.10.1; +use PublicInbox::Eml; + +my %kw2char = ( # Maildir characters + draft => 'D', + flagged => 'F', + answered => 'R', + seen => 'S' +); + +my %kw2status = ( + flagged => [ 'X-Status' => 'F' ], + answered => [ 'X-Status' => 'A' ], + seen => [ 'Status' => 'R' ], + draft => [ 'X-Status' => 'T' ], +); + +sub _mbox_hdr_buf ($$$) { + my ($eml, $type, $kw) = @_; + $eml->header_set($_) for (qw(Lines Bytes Content-Length)); + my %hdr; # set Status, X-Status + for my $k (@$kw) { + if (my $ent = $kw2status{$k}) { + push @{$hdr{$ent->[0]}}, $ent->[1]; + } else { # X-Label? + warn "TODO: keyword `$k' not supported for mbox\n"; + } + } + while (my ($name, $chars) = each %hdr) { + $eml->header_set($name, join('', sort @$chars)); + } + my $buf = delete $eml->{hdr}; + + # fixup old bug from import (pre-a0c07cba0e5d8b6a) + $$buf =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; + + substr($$buf, 0, 0, # prepend From line + "From lei\@$type Thu Jan 1 00:00:00 1970$eml->{crlf}"); + $buf; +} + +sub write_in_full_atomic ($$) { + my ($fh, $buf) = @_; + defined(my $w = syswrite($fh, $$buf)) or die "write: $!"; + $w == length($$buf) or die "short write: $w != ".length($$buf); +} + +sub eml2mboxrd ($;$) { + my ($eml, $kw) = @_; + my $buf = _mbox_hdr_buf($eml, 'mboxrd', $kw); + if (my $bdy = delete $eml->{bdy}) { + $$bdy =~ s/^(>*From )/>$1/gm; + $$buf .= $eml->{crlf}; + substr($$bdy, 0, 0, $$buf); # prepend header + $buf = $bdy; + } + $$buf .= $eml->{crlf}; + $buf; +} + +sub eml2mboxo { + my ($eml, $kw) = @_; + my $buf = _mbox_hdr_buf($eml, 'mboxo', $kw); + if (my $bdy = delete $eml->{bdy}) { + $$bdy =~ s/^From />From /gm; + $$buf .= $eml->{crlf}; + substr($$bdy, 0, 0, $$buf); # prepend header + $buf = $bdy; + } + $$buf .= $eml->{crlf}; + $buf; +} + +# mboxcl still escapes "From " lines +sub eml2mboxcl { + my ($eml, $kw) = @_; + my $buf = _mbox_hdr_buf($eml, 'mboxcl', $kw); + my $crlf = $eml->{crlf}; + if (my $bdy = delete $eml->{bdy}) { + $$bdy =~ s/^From />From /gm; + $$buf .= 'Content-Length: '.length($$bdy).$crlf.$crlf; + substr($$bdy, 0, 0, $$buf); # prepend header + $buf = $bdy; + } + $$buf .= $crlf; + $buf; +} + +# mboxcl2 has no "From " escaping +sub eml2mboxcl2 { + my ($eml, $kw) = @_; + my $buf = _mbox_hdr_buf($eml, 'mboxcl2', $kw); + my $crlf = $eml->{crlf}; + if (my $bdy = delete $eml->{bdy}) { + $$buf .= 'Content-Length: '.length($$bdy).$crlf.$crlf; + substr($$bdy, 0, 0, $$buf); # prepend header + $buf = $bdy; + } + $$buf .= $crlf; + $buf; +} + +1; diff --git a/t/lei_to_mail.t b/t/lei_to_mail.t new file mode 100644 index 00000000..089a422e --- /dev/null +++ b/t/lei_to_mail.t @@ -0,0 +1,65 @@ +#!perl -w +# Copyright (C) 2020 all contributors +# License: AGPL-3.0+ +use strict; +use v5.10.1; +use Test::More; +use PublicInbox::TestCommon; +use PublicInbox::Eml; +use_ok 'PublicInbox::LeiToMail'; +my $from = "Content-Length: 10\nSubject: x\n\nFrom hell\n"; +my $noeol = "Subject: x\n\nFrom hell"; +my $crlf = $noeol; +$crlf =~ s/\n/\r\n/g; +my $kw = [qw(seen answered flagged)]; +for my $mbox (qw(mboxrd mboxo mboxcl mboxcl2)) { + my $m = "eml2$mbox"; + my $cb = PublicInbox::LeiToMail->can($m); + my $s = $cb->(PublicInbox::Eml->new($from), $kw); + is(substr($$s, -1, 1), "\n", "trailing LF in normal $mbox"); + my $eml = PublicInbox::Eml->new($s); + is($eml->header('Status'), 'R', "Status: set by $m"); + is($eml->header('X-Status'), 'AF', "X-Status: set by $m"); + if ($mbox eq 'mboxcl2') { + like($eml->body_raw, qr/^From /, "From not escaped $m"); + } else { + like($eml->body_raw, qr/^>From /, "From escaped once by $m"); + } + my @cl = $eml->header('Content-Length'); + if ($mbox =~ /mboxcl/) { + is(scalar(@cl), 1, "$m only has one Content-Length header"); + is($cl[0] + length("\n"), + length($eml->body_raw), "$m Content-Length matches"); + } else { + is(scalar(@cl), 0, "$m clobbered Content-Length"); + } + $s = $cb->(PublicInbox::Eml->new($noeol), $kw); + is(substr($$s, -1, 1), "\n", + "trailing LF added by $m when original lacks EOL"); + $eml = PublicInbox::Eml->new($s); + if ($mbox eq 'mboxcl2') { + is($eml->body_raw, "From hell\n", "From not escaped by $m"); + } else { + is($eml->body_raw, ">From hell\n", "From escaped once by $m"); + } + $s = $cb->(PublicInbox::Eml->new($crlf), $kw); + is(substr($$s, -2, 2), "\r\n", + "trailing CRLF added $m by original lacks EOL"); + $eml = PublicInbox::Eml->new($s); + if ($mbox eq 'mboxcl2') { + is($eml->body_raw, "From hell\r\n", "From not escaped by $m"); + } else { + is($eml->body_raw, ">From hell\r\n", "From escaped once by $m"); + } + if ($mbox =~ /mboxcl/) { + is($eml->header('Content-Length') + length("\r\n"), + length($eml->body_raw), "$m Content-Length matches"); + } elsif ($mbox eq 'mboxrd') { + $s = $cb->($eml, $kw); + $eml = PublicInbox::Eml->new($s); + is($eml->body_raw, + ">>From hell\r\n\r\n", "From escaped again by $m"); + } +} + +done_testing;