From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 422021F9F3 for ; Sat, 3 Apr 2021 10:48:28 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 4/5] lei: improve handling of Message-ID-less draft messages Date: Sat, 3 Apr 2021 10:48:26 +0000 Message-Id: <20210403104827.17003-5-e@80x24.org> In-Reply-To: <20210403104827.17003-1-e@80x24.org> References: <20210403104827.17003-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: We need a stable fallback time for digest2mid in the presence of messages without Received/Date headers. Furthermore, we must avoid using uninitialized smsg->{mid} when parsing References for draft replies. --- lib/PublicInbox/Import.pm | 6 +++--- lib/PublicInbox/LeiSearch.pm | 2 +- lib/PublicInbox/OverIdx.pm | 6 ++++-- lib/PublicInbox/Smsg.pm | 2 +- t/lei-import.t | 21 +++++++++++++++++++++ 5 files changed, 30 insertions(+), 7 deletions(-) diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm index 34738279..46f57e27 100644 --- a/lib/PublicInbox/Import.pm +++ b/lib/PublicInbox/Import.pm @@ -510,8 +510,8 @@ sub atfork_child { } } -sub digest2mid ($$) { - my ($dig, $hdr) = @_; +sub digest2mid ($$;$) { + my ($dig, $hdr, $fallback_time) = @_; my $b64 = $dig->clone->b64digest; # Make our own URLs nicer: # See "Base 64 Encoding with URL and Filename Safe Alphabet" in RFC4648 @@ -520,7 +520,7 @@ sub digest2mid ($$) { # Add a date prefix to prevent a leading '-' in case that trips # up some tools (e.g. if a Message-ID were a expected as a # command-line arg) - my $dt = msg_datestamp($hdr); + my $dt = msg_datestamp($hdr, $fallback_time); $dt = POSIX::strftime('%Y%m%d%H%M%S', gmtime($dt)); "$dt.$b64" . '@z'; } diff --git a/lib/PublicInbox/LeiSearch.pm b/lib/PublicInbox/LeiSearch.pm index 69ba8303..148aa185 100644 --- a/lib/PublicInbox/LeiSearch.pm +++ b/lib/PublicInbox/LeiSearch.pm @@ -58,7 +58,7 @@ sub content_key ($) { qw(Message-ID X-Alt-Message-ID Resent-Message-ID)); unless (@$mids) { $eml->{-lei_fake_mid} = $mids->[0] = - PublicInbox::Import::digest2mid($dig, $eml); + PublicInbox::Import::digest2mid($dig, $eml, 0); } ($chash, $mids); } diff --git a/lib/PublicInbox/OverIdx.pm b/lib/PublicInbox/OverIdx.pm index e1cd31b9..66dec099 100644 --- a/lib/PublicInbox/OverIdx.pm +++ b/lib/PublicInbox/OverIdx.pm @@ -264,8 +264,10 @@ sub add_overview { $smsg->{lines} = $eml->body_raw =~ tr!\n!\n!; my $mids = mids_for_index($eml); my $refs = $smsg->parse_references($eml, $mids); - $mids->[0] //= $smsg->{mid} //= $eml->{-lei_fake_mid}; - $smsg->{mid} //= ''; + $mids->[0] //= do { + $smsg->{mid} //= ''; + $eml->{-lei_fake_mid}; + }; my $subj = $smsg->{subject}; my $xpath; if ($subj ne '') { diff --git a/lib/PublicInbox/Smsg.pm b/lib/PublicInbox/Smsg.pm index b4cc2ecb..da8ce590 100644 --- a/lib/PublicInbox/Smsg.pm +++ b/lib/PublicInbox/Smsg.pm @@ -76,7 +76,7 @@ sub parse_references ($$$) { return $refs if scalar(@$refs) == 0; # prevent circular references here: - my %seen = ( $smsg->{mid} => 1 ); + my %seen = ( ($smsg->{mid} // '') => 1 ); my @keep; foreach my $ref (@$refs) { if (length($ref) > PublicInbox::MID::MAX_MID_SIZE) { diff --git a/t/lei-import.t b/t/lei-import.t index 99289748..9bb4e1fa 100644 --- a/t/lei-import.t +++ b/t/lei-import.t @@ -79,6 +79,27 @@ is($res->[1], undef, 'only one result'); is($res->[0]->{'m'}, 'k@y', 'got expected message'); is_deeply($res->[0]->{kw}, ['seen'], "`seen' keywords set"); +# no From, Sender, or Message-ID +$eml_str = <<'EOM'; +Subject: draft message with no sender +References: + +No use for a name +EOM +lei_ok([qw(import -F eml -)], undef, { %$lei_opt, 0 => \$eml_str }); +lei_ok(['q', 's:draft message with no sender']); +my $draft_a = json_utf8->decode($lei_out); +ok(!exists $draft_a->[0]->{'m'}, 'no fake mid stored or exposed'); +lei_ok([qw(tag -F eml - +kw:draft)], undef, { %$lei_opt, 0 => \$eml_str }); +lei_ok(['q', 's:draft message with no sender']); +my $draft_b = json_utf8->decode($lei_out); +my $kw = delete $draft_b->[0]->{kw}; +is_deeply($kw, ['draft'], 'draft kw set'); +is_deeply($draft_a, $draft_b, 'fake Message-ID lookup') or + diag explain($draft_a, $draft_b); +lei_ok('blob', '--mail', $draft_b->[0]->{blob}); +is($lei_out, $eml_str, 'draft retrieved by blob'); + # see t/lei_to_mail.t for "import -F mbox*" }); done_testing;