From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.0 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 6AA141FAE2 for ; Tue, 27 Mar 2018 11:11:33 +0000 (UTC) From: "Eric Wong (Contractor, The Linux Foundation)" To: meta@public-inbox.org Subject: [PATCH 01/11] import: consolidate mid prepend logic, here Date: Tue, 27 Mar 2018 11:11:22 +0000 Message-Id: <20180327111132.20681-2-e@80x24.org> In-Reply-To: <20180327111132.20681-1-e@80x24.org> References: <20180327111132.20681-1-e@80x24.org> List-Id: This also quiets down warnings from -watch when spam training happens on messages without Message-Id. --- lib/PublicInbox/Import.pm | 31 ++++++++++++++++++++++++------- lib/PublicInbox/V2Writable.pm | 3 +-- 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm index 5d116a1..6824fac 100644 --- a/lib/PublicInbox/Import.pm +++ b/lib/PublicInbox/Import.pm @@ -203,7 +203,7 @@ sub remove { my ($r, $w) = $self->gfi_start; my $tip = $self->{tip}; if ($path_type eq '2/38') { - $path = mid2path(mid_mime($mime)); + $path = mid2path(v1_mid0($mime)); ($err, $cur) = check_remove_v1($r, $w, $tip, $path, $mime); return ($err, $cur) if $err; } else { @@ -296,6 +296,28 @@ sub drop_unwanted_headers ($) { $mime->header_set($_) for @PublicInbox::MDA::BAD_HEADERS; } +# used by V2Writable, too +sub prepend_mid ($$) { + my ($hdr, $mid0) = @_; + # @cur is likely empty if we need to call this sub, but it could + # have random unparseable crap which we'll preserve, too. + my @cur = $hdr->header_raw('Message-Id'); + $hdr->header_set('Message-Id', "<$mid0>", @cur); +} + +sub v1_mid0 ($) { + my ($mime) = @_; + my $hdr = $mime->header_obj; + my $mids = mids($hdr); + + if (!scalar(@$mids)) { # spam often has no Message-Id + my $mid0 = digest2mid(content_digest($mime)); + prepend_mid($hdr, $mid0); + return $mid0; + } + $mids->[0]; +} + # returns undef on duplicate # returns the :MARK of the most recent commit sub add { @@ -313,12 +335,7 @@ sub add { my $path; if ($path_type eq '2/38') { - my $mids = mids($mime->header_obj); - if (!scalar(@$mids)) { - my $dig = content_digest($mime); - @$mids = (digest2mid($dig)); - } - $path = mid2path($mids->[0]); + $path = mid2path(v1_mid0($mime)); } else { # v2 layout, one file: $path = 'm'; } diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index 20c2736..b04e6fb 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -173,8 +173,7 @@ sub num_for_harder { $num = $self->{skel}->{mm}->mid_insert($$mid0); } } - my @cur = $hdr->header_raw('Message-Id'); - $hdr->header_set('Message-Id', "<$$mid0>", @cur); + PublicInbox::Import::prepend_mid($hdr, $$mid0); $num; } -- EW