From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.0 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 51D0F1FB02 for ; Tue, 6 Mar 2018 08:42:44 +0000 (UTC) From: "Eric Wong (Contractor, The Linux Foundation)" To: meta@public-inbox.org Subject: [PATCH 31/34] favor Received: date over Date: header globally Date: Tue, 6 Mar 2018 08:42:39 +0000 Message-Id: <20180306084242.19988-32-e@80x24.org> In-Reply-To: <20180306084242.19988-1-e@80x24.org> References: <20180306084242.19988-1-e@80x24.org> List-Id: The first Received: header is believable since it typically hits the user's mail server and can be treated as relatively trustworthy. We still show the Date: in per-message (permalink) views, which may expose users for having incorrect Date: headers, but all the ISO YYYY-MM-DD dates we display will match what we see. --- MANIFEST | 1 + lib/PublicInbox/Import.pm | 35 ++------------------------- lib/PublicInbox/MsgTime.pm | 51 ++++++++++++++++++++++++++++++++++++++++ lib/PublicInbox/SearchMsg.pm | 6 +++-- lib/PublicInbox/View.pm | 8 +------ lib/PublicInbox/WwwAtomStream.pm | 5 ++-- scripts/import_vger_from_mbox | 1 - 7 files changed, 61 insertions(+), 46 deletions(-) create mode 100644 lib/PublicInbox/MsgTime.pm diff --git a/MANIFEST b/MANIFEST index 7366aa0..a42b9e1 100644 --- a/MANIFEST +++ b/MANIFEST @@ -73,6 +73,7 @@ lib/PublicInbox/MID.pm lib/PublicInbox/MIME.pm lib/PublicInbox/Mbox.pm lib/PublicInbox/MsgIter.pm +lib/PublicInbox/MsgTime.pm lib/PublicInbox/Msgmap.pm lib/PublicInbox/NNTP.pm lib/PublicInbox/NNTPD.pm diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm index ddb63b1..7ba1668 100644 --- a/lib/PublicInbox/Import.pm +++ b/lib/PublicInbox/Import.pm @@ -12,8 +12,7 @@ use PublicInbox::Spawn qw(spawn); use PublicInbox::MID qw(mid_mime mid2path); use PublicInbox::Address; use PublicInbox::ContentId qw(content_id); -use Date::Parse qw(str2time); -use Time::Zone qw(tz_offset); +use PublicInbox::MsgTime qw(msg_timestamp); sub new { my ($class, $git, $name, $email, $ibx) = @_; @@ -204,37 +203,7 @@ sub remove { sub parse_date ($) { my ($mime) = @_; - my $hdr = $mime->header_obj; - my $date = $hdr->header_raw('Date'); - my ($ts, $zone); - my $mid = $hdr->header_raw('Message-ID'); - if ($date) { - $ts = eval { str2time($date) }; - if ($@) { - warn "bad Date: $date in $mid: $@\n"; - } elsif ($date =~ /\s+([\+\-]\d+)\s*\z/) { - $zone = $1; - } - } - unless ($ts) { - my @recvd = $hdr->header_raw('Received'); - foreach my $r (@recvd) { - $zone = undef; - $r =~ /\s*(\d+\s+[[:alpha:]]+\s+\d{2,4}\s+ - \d+\D\d+(?:\D\d+)\s+([\+\-]\d+))/osx or next; - $zone = $2; - $ts = eval { str2time($1) } and last; - warn "no date in Received: $r\n"; - } - } - $zone ||= '+0000'; - # "-1200" is the furthest westermost zone offset, - # but git fast-import is liberal so we use "-1400" - if ($zone >= 1400 || $zone <= -1400) { - warn "bogus TZ offset: $zone, ignoring and assuming +0000\n"; - $zone = '+0000'; - } - $ts = time unless defined $ts; + my ($ts, $zone) = msg_timestamp($mime->header_obj); $ts = 0 if $ts < 0; # git uses unsigned times "$ts $zone"; } diff --git a/lib/PublicInbox/MsgTime.pm b/lib/PublicInbox/MsgTime.pm new file mode 100644 index 0000000..87664f4 --- /dev/null +++ b/lib/PublicInbox/MsgTime.pm @@ -0,0 +1,51 @@ +# Copyright (C) 2018 all contributors +# License: AGPL-3.0+ +package PublicInbox::MsgTime; +use strict; +use warnings; +use base qw(Exporter); +our @EXPORT_OK = qw(msg_timestamp); +use Date::Parse qw(str2time); +use Time::Zone qw(tz_offset); + +sub msg_timestamp ($) { + my ($hdr) = @_; # Email::MIME::Header + my ($ts, $zone); + my $mid; + my @recvd = $hdr->header_raw('Received'); + foreach my $r (@recvd) { + $zone = undef; + $r =~ /\s*(\d+\s+[[:alpha:]]+\s+\d{2,4}\s+ + \d+\D\d+(?:\D\d+)\s+([\+\-]\d+))/sx or next; + $zone = $2; + $ts = eval { str2time($1) } and last; + $mid ||= $hdr->header_raw('Message-ID'); + warn "no date in $mid Received: $r\n"; + } + unless (defined $ts) { + my @date = $hdr->header_raw('Date'); + foreach my $d (@date) { + $zone = undef; + $ts = eval { str2time($d) }; + if ($@) { + $mid ||= $hdr->header_raw('Message-ID'); + warn "bad Date: $d in $mid: $@\n"; + } elsif ($d =~ /\s+([\+\-]\d+)\s*\z/) { + $zone = $1; + } + } + } + $ts = time unless defined $ts; + return $ts unless wantarray; + + $zone ||= '+0000'; + # "-1200" is the furthest westermost zone offset, + # but git fast-import is liberal so we use "-1400" + if ($zone >= 1400 || $zone <= -1400) { + warn "bogus TZ offset: $zone, ignoring and assuming +0000\n"; + $zone = '+0000'; + } + ($ts, $zone); +} + +1; diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm index a62a649..23478a2 100644 --- a/lib/PublicInbox/SearchMsg.pm +++ b/lib/PublicInbox/SearchMsg.pm @@ -7,9 +7,9 @@ package PublicInbox::SearchMsg; use strict; use warnings; use Search::Xapian; -use Date::Parse qw/str2time/; use PublicInbox::MID qw/mid_clean mid_mime/; use PublicInbox::Address; +use PublicInbox::MsgTime qw(msg_timestamp); sub new { my ($class, $mime) = @_; @@ -117,7 +117,9 @@ sub from_name { sub ts { my ($self) = @_; - $self->{ts} ||= eval { str2time($self->{mime}->header('Date')) } || 0; + $self->{ts} ||= eval { + msg_timestamp($self->{mime}->header_obj); + } || 0; } sub to_doc_data { diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index aad6748..f811f4f 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -6,7 +6,7 @@ package PublicInbox::View; use strict; use warnings; -use Date::Parse qw/str2time/; +use PublicInbox::MsgTime qw(msg_timestamp); use PublicInbox::Hval qw/ascii_html obfuscate_addrs/; use PublicInbox::Linkify; use PublicInbox::MID qw/mid_clean id_compress mid_mime mid_escape/; @@ -732,12 +732,6 @@ sub load_results { $srch->retry_reopen(sub { [ map { $_->mid; $_ } @$msgs ] }); } -sub msg_timestamp { - my ($hdr) = @_; - my $ts = eval { str2time($hdr->header('Date')) }; - defined($ts) ? $ts : 0; -} - sub thread_results { my ($msgs, $srch) = @_; require PublicInbox::SearchThread; diff --git a/lib/PublicInbox/WwwAtomStream.pm b/lib/PublicInbox/WwwAtomStream.pm index b69de85..bb574a7 100644 --- a/lib/PublicInbox/WwwAtomStream.pm +++ b/lib/PublicInbox/WwwAtomStream.pm @@ -7,11 +7,11 @@ use strict; use warnings; use POSIX qw(strftime); -use Date::Parse qw(str2time); use Digest::SHA qw(sha1_hex); use PublicInbox::Address; use PublicInbox::Hval qw(ascii_html); use PublicInbox::MID qw/mid_clean mid_escape/; +use PublicInbox::MsgTime qw(msg_timestamp); # called by PSGI server after getline: sub close {} @@ -108,8 +108,7 @@ sub feed_entry { $irt = ''; } my $href = $base . mid_escape($mid) . '/'; - my $date = $hdr->header('Date'); - my $t = eval { str2time($date) } if defined $date; + my $t = msg_timestamp($hdr); my @t = gmtime(defined $t ? $t : time); my $updated = feed_updated(@t); diff --git a/scripts/import_vger_from_mbox b/scripts/import_vger_from_mbox index 8f0ec7c..4469887 100644 --- a/scripts/import_vger_from_mbox +++ b/scripts/import_vger_from_mbox @@ -4,7 +4,6 @@ use strict; use warnings; use Getopt::Long qw/:config gnu_getopt no_ignore_case auto_abbrev/; -use Date::Parse qw/str2time/; use PublicInbox::MIME; use PublicInbox::Inbox; use PublicInbox::V2Writable; -- EW