* [PATCH 1/7] search: support multiple From/To/Cc/Subject headers
2019-10-24 0:12 [PATCH 0/7] redundant header madness Eric Wong
@ 2019-10-24 0:12 ` Eric Wong
2019-10-24 0:12 ` [PATCH 2/7] view: display redundant headers in permalink Eric Wong
` (5 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Eric Wong @ 2019-10-24 0:12 UTC (permalink / raw)
To: meta
We can easily support searching on messages with
multiple From/To/Cc/Subject headers just like we
do with multiple Message-ID headers.
This matches the normal mutt pager display behavior.
---
lib/PublicInbox/SearchMsg.pm | 4 ++--
t/v2reindex.t | 16 ++++++++++++----
2 files changed, 14 insertions(+), 6 deletions(-)
diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm
index adadf92e..7561e7f2 100644
--- a/lib/PublicInbox/SearchMsg.pm
+++ b/lib/PublicInbox/SearchMsg.pm
@@ -107,8 +107,8 @@ sub __hdr ($$) {
return $val if defined $val;
my $mime = $self->{mime} or return;
- $val = $mime->header($field);
- $val = '' unless defined $val;
+ my @raw = $mime->header($field);
+ $val = join(', ', @raw);
$val =~ tr/\t\n/ /;
$val =~ tr/\r//d;
$self->{$field} = $val;
diff --git a/t/v2reindex.t b/t/v2reindex.t
index 52711f8f..3e56ddfa 100644
--- a/t/v2reindex.t
+++ b/t/v2reindex.t
@@ -439,7 +439,7 @@ ok(!-d $xap, 'Xapian directories removed again');
my @warn;
local $SIG{__WARN__} = sub { push @warn, @_ };
my %config = %$ibx_config;
- $config{indexlevel} = 'basic';
+ $config{indexlevel} = 'medium';
my $ibx = PublicInbox::Inbox->new(\%config);
my $im = PublicInbox::V2Writable->new($ibx);
my $m3 = PublicInbox::MIME->new(<<'EOF');
@@ -447,7 +447,7 @@ Date: Tue, 24 May 2016 14:34:22 -0700 (PDT)
Message-Id: <20160524.143422.552507610109476444.d@example.com>
To: t@example.com
Cc: c@example.com
-Subject: Re: [PATCH v2 2/2]
+Subject: Re: [PATCH v2 2/2] uno
From: <f@example.com>
In-Reply-To: <1463825855-7363-2-git-send-email-y@example.com>
References: <1463825855-7363-1-git-send-email-y@example.com>
@@ -456,14 +456,14 @@ Date: Wed, 25 May 2016 10:01:51 +0900
From: h@example.com
To: g@example.com
Cc: m@example.com
-Subject: Re: [PATCH]
+Subject: Re: [PATCH] dos
Message-ID: <20160525010150.GD7292@example.com>
References: <1463498133-23918-1-git-send-email-g+r@example.com>
In-Reply-To: <1463498133-23918-1-git-send-email-g+r@example.com>
From: s@example.com
To: h@example.com
Cc: m@example.com
-Subject: [PATCH 12/13]
+Subject: [PATCH 12/13] tres
Date: Wed, 01 Jun 2016 01:32:35 +0300
Message-ID: <1923946.Jvi0TDUXFC@wasted.example.com>
In-Reply-To: <13205049.n7pM8utpHF@wasted.example.com>
@@ -495,6 +495,14 @@ EOF
eval { $im->index_sync({reindex=>1}) };
is($@, '', 'no error from reindexing after reused Message-ID (x3)');
is_deeply(\@warn, [], 'no warnings on reindex');
+
+ my %uniq;
+ for my $s (qw(uno dos tres)) {
+ my $msgs = $ibx->search->query("s:$s");
+ is(scalar(@$msgs), 1, "only one result for `$s'");
+ $uniq{$msgs->[0]->{num}}++;
+ }
+ is_deeply([values %uniq], [3], 'search on different subjects');
}
done_testing();
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 2/7] view: display redundant headers in permalink
2019-10-24 0:12 [PATCH 0/7] redundant header madness Eric Wong
2019-10-24 0:12 ` [PATCH 1/7] search: support multiple From/To/Cc/Subject headers Eric Wong
@ 2019-10-24 0:12 ` Eric Wong
2019-10-24 0:12 ` [PATCH 3/7] view: move '<' and '>' outside <a> Eric Wong
` (4 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Eric Wong @ 2019-10-24 0:12 UTC (permalink / raw)
To: meta
Mail headers can contain multiple headers of any type, so ensure
we don't hide any information we're getting in the per-message
permalink views.
This means it's possible to have multiple From, Date, To, Cc,
Subject, and In-Reply-To headers displayed.
The thread indices are a special case, I guess, since we run
out of space on the line if the headers too long and tools like
mutt only show the first one.
---
lib/PublicInbox/Linkify.pm | 29 +++++++++++++++
lib/PublicInbox/View.pm | 75 ++++++++++++++++++++++----------------
2 files changed, 73 insertions(+), 31 deletions(-)
diff --git a/lib/PublicInbox/Linkify.pm b/lib/PublicInbox/Linkify.pm
index 175f8d72..5b83742c 100644
--- a/lib/PublicInbox/Linkify.pm
+++ b/lib/PublicInbox/Linkify.pm
@@ -89,4 +89,33 @@ sub linkify_2 {
$_[1];
}
+# single pass linkification of <Message-ID@example.com> within $str
+# with $pfx being the URL prefix
+sub linkify_mids {
+ my ($self, $pfx, $str) = @_;
+ $$str =~ s!<([^>]+)>!
+ my $msgid = PublicInbox::Hval->new_msgid($1);
+ my $html = $msgid->as_html;
+ my $href = $msgid->{href};
+ $href = ascii_html($href); # for IDN
+
+ # salt this, as this could be exploited to show
+ # links in the HTML which don't show up in the raw mail.
+ my $key = sha1_hex($html . $SALT);
+ $self->{$key} = [ $href, $html ];
+ '<PI-LINK-'. $key . '>';
+ !ge;
+ $$str = ascii_html($$str);
+ $$str =~ s!\bPI-LINK-([a-f0-9]{40})\b!
+ my $key = $1;
+ my $repl = $_[0]->{$key};
+ if (defined $repl) {
+ "<a\nhref=\"$pfx/$repl->[0]/\">$repl->[1]</a>";
+ } else {
+ # false positive or somebody tried to mess with us
+ $key;
+ }
+ !ge;
+}
+
1;
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index aeb32fc8..1aa014fd 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -190,8 +190,8 @@ sub fold_addresses ($) {
sub _hdr_names_html ($$) {
my ($hdr, $field) = @_;
- my $val = $hdr->header($field) or return '';
- ascii_html(join(', ', PublicInbox::Address::names($val)));
+ my @vals = $hdr->header($field) or return '';
+ ascii_html(join(', ', PublicInbox::Address::names(join(',', @vals))));
}
sub nr_to_s ($$$) {
@@ -643,12 +643,11 @@ sub _msg_html_prepare {
if ($over) {
$ctx->{-upfx} = '../';
}
- my @title;
- my $v;
- if (defined($v = $hdr->header('From'))) {
+ my @title; # (Subject[0], From[0])
+ for my $v ($hdr->header('From')) {
$v = PublicInbox::Hval->new($v);
my @n = PublicInbox::Address::names($v->raw);
- $title[1] = ascii_html(join(', ', @n));
+ $title[1] //= ascii_html(join(', ', @n));
$v = $v->as_html;
if ($obfs_ibx) {
obfuscate_addrs($obfs_ibx, $v);
@@ -657,26 +656,31 @@ sub _msg_html_prepare {
$rv .= "From: $v\n" if $v ne '';
}
foreach my $h (qw(To Cc)) {
- defined($v = $hdr->header($h)) or next;
- fold_addresses($v);
- $v = ascii_html($v);
- obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx;
- $rv .= "$h: $v\n" if $v ne '';
+ for my $v ($hdr->header($h)) {
+ fold_addresses($v);
+ $v = ascii_html($v);
+ obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx;
+ $rv .= "$h: $v\n" if $v ne '';
+ }
}
- if (defined($v = $hdr->header('Subject')) && ($v ne '')) {
- $v = ascii_html($v);
- obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx;
- if ($over) {
- $rv .= qq(Subject: <a\nhref="#r"\nid=t>$v</a>\n);
- } else {
- $rv .= "Subject: $v\n";
+ my @subj = $hdr->header('Subject');
+ if (@subj) {
+ for my $v (@subj) {
+ $v = ascii_html($v);
+ obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx;
+ $rv .= 'Subject: ';
+ if ($over) {
+ $rv .= qq(<a\nhref="#r"\nid=t>$v</a>\n);
+ } else {
+ $rv .= "$v\n";
+ }
+ $title[0] //= $v;
}
- $title[0] = $v;
} else { # dummy anchor for thread skeleton at bottom of page
$rv .= qq(<a\nhref="#r"\nid=t></a>) if $over;
$title[0] = '(no subject)';
}
- if (defined($v = $hdr->header('Date'))) {
+ for my $v ($hdr->header('Date')) {
$v = ascii_html($v);
obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx; # possible :P
$rv .= "Date: $v\n";
@@ -727,8 +731,9 @@ sub thread_skel {
$$dst .= "$nr+ messages / $expand";
$$dst .= qq! <a\nhref="#b">top</a>\n!;
- my $subj = $hdr->header('Subject');
- defined $subj or $subj = '';
+ # nb: mutt only shows the first Subject in the index pane
+ # when multiple Subject: headers are present, so we follow suit:
+ my $subj = $hdr->header('Subject') // '';
$subj = '(no subject)' if $subj eq '';
$ctx->{prev_subj} = [ split(/ /, subject_normalized($subj)) ];
$ctx->{cur} = $mid;
@@ -746,21 +751,29 @@ sub thread_skel {
sub _parent_headers {
my ($hdr, $over) = @_;
my $rv = '';
-
- my $refs = references($hdr);
- my $irt = pop @$refs;
- if (defined $irt) {
- my $v = PublicInbox::Hval->new_msgid($irt);
- my $html = $v->as_html;
- my $href = $v->{href};
- $rv .= "In-Reply-To: <";
- $rv .= "<a\nhref=\"../$href/\">$html</a>>\n";
+ my @irt = $hdr->header_raw('In-Reply-To');
+ my $refs;
+ if (@irt) {
+ my $lnk = PublicInbox::Linkify->new;
+ $rv .= "In-Reply-To: $_\n" for @irt;
+ $lnk->linkify_mids('..', \$rv);
+ } else {
+ $refs = references($hdr);
+ my $irt = pop @$refs;
+ if (defined $irt) {
+ my $v = PublicInbox::Hval->new_msgid($irt);
+ my $html = $v->as_html;
+ my $href = $v->{href};
+ $rv .= "In-Reply-To: <";
+ $rv .= "<a\nhref=\"../$href/\">$html</a>>\n";
+ }
}
# do not display References: if search is present,
# we show the thread skeleton at the bottom, instead.
return $rv if $over;
+ $refs //= references($hdr);
if (@$refs) {
@$refs = map { linkify_ref_no_over($_) } @$refs;
$rv .= 'References: '. join("\n\t", @$refs) . "\n";
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 3/7] view: move '<' and '>' outside <a>
2019-10-24 0:12 [PATCH 0/7] redundant header madness Eric Wong
2019-10-24 0:12 ` [PATCH 1/7] search: support multiple From/To/Cc/Subject headers Eric Wong
2019-10-24 0:12 ` [PATCH 2/7] view: display redundant headers in permalink Eric Wong
@ 2019-10-24 0:12 ` Eric Wong
2019-10-24 0:12 ` [PATCH 4/7] view: improve warning for multiple Message-IDs Eric Wong
` (3 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Eric Wong @ 2019-10-24 0:12 UTC (permalink / raw)
To: meta
Browsers may underline '<' and '>' in links, which may be
confused with '≤' and '≥'. So have the Message-ID header
display follow what we do with In-Reply-To headers and move the
"<" and ">" outside of <a> in the HTML.
---
lib/PublicInbox/View.pm | 18 +++++++++---------
t/psgi_v2.t | 2 +-
2 files changed, 10 insertions(+), 10 deletions(-)
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 1aa014fd..855ad017 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -630,7 +630,6 @@ sub _msg_html_prepare {
my $obfs_ibx = $ctx->{-obfs_ibx};
my $rv = '';
my $mids = mids($hdr);
- my $multiple = scalar(@$mids) > 1; # zero, one, infinity
if ($nr == 0) {
if ($more) {
$rv .=
@@ -686,18 +685,19 @@ sub _msg_html_prepare {
$rv .= "Date: $v\n";
}
$ctx->{-title_html} = join(' - ', @title);
- foreach (@$mids) {
- my $mid = PublicInbox::Hval->new_msgid($_) ;
+ if (scalar(@$mids) == 1) { # common case
+ my $mid = PublicInbox::Hval->new_msgid($mids->[0]);
my $mhtml = $mid->as_html;
- if ($multiple) {
+ $rv .= "Message-ID: <$mhtml> ";
+ $rv .= "(<a\nhref=\"raw\">raw</a>)\n";
+ } else {
+ foreach (@$mids) {
+ my $mid = PublicInbox::Hval->new_msgid($_);
+ my $mhtml = $mid->as_html;
my $href = $mid->{href};
$rv .= "Message-ID: ";
- $rv .= "<a\nhref=\"../$href/\">";
- $rv .= "<$mhtml></a> ";
+ $rv .= "<<a\nhref=\"../$href/\">$mhtml</a>> ";
$rv .= "(<a\nhref=\"../$href/raw\">raw</a>)\n";
- } else {
- $rv .= "Message-ID: <$mhtml> ";
- $rv .= "(<a\nhref=\"raw\">raw</a>)\n";
}
}
$rv .= _parent_headers($hdr, $over);
diff --git a/t/psgi_v2.t b/t/psgi_v2.t
index cb5ece63..c7550e2d 100644
--- a/t/psgi_v2.t
+++ b/t/psgi_v2.t
@@ -168,7 +168,7 @@ test_psgi(sub { $www->call(@_) }, sub {
@from_ = ($raw =~ m/>From: /mg);
is(scalar(@from_), 3, 'three From: lines');
foreach my $mid ('a-mid@b', $new_mid, $third) {
- like($raw, qr/<\Q$mid\E>/s, "Message-ID $mid shown");
+ like($raw, qr!>\Q$mid\E</a>!s, "Message-ID $mid shown");
}
like($raw, qr/\b3\+ messages\b/, 'thread overview shown');
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 4/7] view: improve warning for multiple Message-IDs
2019-10-24 0:12 [PATCH 0/7] redundant header madness Eric Wong
` (2 preceding siblings ...)
2019-10-24 0:12 ` [PATCH 3/7] view: move '<' and '>' outside <a> Eric Wong
@ 2019-10-24 0:12 ` Eric Wong
2019-10-24 0:12 ` [PATCH 5/7] linkify: support adding "(raw)" link for Message-IDs Eric Wong
` (2 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Eric Wong @ 2019-10-24 0:12 UTC (permalink / raw)
To: meta
"refer" is not the correct term, here; since that would mean
multiple messages have the current message in the "References:"
header, and that's a normal occurence.
Instead, we need to warn the reader that the given message
itself has multiple Message-IDs.
---
lib/PublicInbox/View.pm | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 855ad017..ff55596d 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -633,7 +633,7 @@ sub _msg_html_prepare {
if ($nr == 0) {
if ($more) {
$rv .=
-"<pre>WARNING: multiple messages refer to this Message-ID\n</pre>";
+"<pre>WARNING: multiple messages have this Message-ID\n</pre>";
}
$rv .= "<pre\nid=b>"; # anchor for body start
} else {
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 5/7] linkify: support adding "(raw)" link for Message-IDs
2019-10-24 0:12 [PATCH 0/7] redundant header madness Eric Wong
` (3 preceding siblings ...)
2019-10-24 0:12 ` [PATCH 4/7] view: improve warning for multiple Message-IDs Eric Wong
@ 2019-10-24 0:12 ` Eric Wong
2019-10-24 0:12 ` [RFC 6/7] index: allow search/lookups on X-Alt-Message-ID Eric Wong
2019-10-24 0:12 ` [RFC 7/7] view: show X-Alt-Message-ID in permalink view, too Eric Wong
6 siblings, 0 replies; 8+ messages in thread
From: Eric Wong @ 2019-10-24 0:12 UTC (permalink / raw)
To: meta
And use it for the per-message permalink display.
---
lib/PublicInbox/Linkify.pm | 10 ++++++----
lib/PublicInbox/View.pm | 13 +++++--------
2 files changed, 11 insertions(+), 12 deletions(-)
diff --git a/lib/PublicInbox/Linkify.pm b/lib/PublicInbox/Linkify.pm
index 5b83742c..af9be3ff 100644
--- a/lib/PublicInbox/Linkify.pm
+++ b/lib/PublicInbox/Linkify.pm
@@ -92,7 +92,7 @@ sub linkify_2 {
# single pass linkification of <Message-ID@example.com> within $str
# with $pfx being the URL prefix
sub linkify_mids {
- my ($self, $pfx, $str) = @_;
+ my ($self, $pfx, $str, $raw) = @_;
$$str =~ s!<([^>]+)>!
my $msgid = PublicInbox::Hval->new_msgid($1);
my $html = $msgid->as_html;
@@ -102,15 +102,17 @@ sub linkify_mids {
# salt this, as this could be exploited to show
# links in the HTML which don't show up in the raw mail.
my $key = sha1_hex($html . $SALT);
- $self->{$key} = [ $href, $html ];
- '<PI-LINK-'. $key . '>';
+ my $repl = qq(<<a\nhref="$pfx/$href/">$html</a>>);
+ $repl .= qq{ (<a\nhref="$pfx/$href/raw">raw</a>)} if $raw;
+ $self->{$key} = $repl;
+ 'PI-LINK-'. $key;
!ge;
$$str = ascii_html($$str);
$$str =~ s!\bPI-LINK-([a-f0-9]{40})\b!
my $key = $1;
my $repl = $_[0]->{$key};
if (defined $repl) {
- "<a\nhref=\"$pfx/$repl->[0]/\">$repl->[1]</a>";
+ $repl;
} else {
# false positive or somebody tried to mess with us
$key;
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index ff55596d..00bf38a9 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -691,14 +691,11 @@ sub _msg_html_prepare {
$rv .= "Message-ID: <$mhtml> ";
$rv .= "(<a\nhref=\"raw\">raw</a>)\n";
} else {
- foreach (@$mids) {
- my $mid = PublicInbox::Hval->new_msgid($_);
- my $mhtml = $mid->as_html;
- my $href = $mid->{href};
- $rv .= "Message-ID: ";
- $rv .= "<<a\nhref=\"../$href/\">$mhtml</a>> ";
- $rv .= "(<a\nhref=\"../$href/raw\">raw</a>)\n";
- }
+ my $lnk = PublicInbox::Linkify->new;
+ my $s = '';
+ $s .= "Message-ID: $_\n" for ($hdr->header_raw('Message-ID'));
+ $lnk->linkify_mids('..', \$s, 1);
+ $rv .= $s;
}
$rv .= _parent_headers($hdr, $over);
$rv .= "\n";
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [RFC 6/7] index: allow search/lookups on X-Alt-Message-ID
2019-10-24 0:12 [PATCH 0/7] redundant header madness Eric Wong
` (4 preceding siblings ...)
2019-10-24 0:12 ` [PATCH 5/7] linkify: support adding "(raw)" link for Message-IDs Eric Wong
@ 2019-10-24 0:12 ` Eric Wong
2019-10-24 0:12 ` [RFC 7/7] view: show X-Alt-Message-ID in permalink view, too Eric Wong
6 siblings, 0 replies; 8+ messages in thread
From: Eric Wong @ 2019-10-24 0:12 UTC (permalink / raw)
To: meta
Since we replace extra Message-ID headers with X-Alt-Message-ID
to placate NNTP clients, we should allow searching and indexing
on X-Alt-Message-ID just like we do with Message-ID.
---
lib/PublicInbox/MID.pm | 27 +++++++++++++++++++++------
lib/PublicInbox/OverIdx.pm | 4 ++--
lib/PublicInbox/SearchIdx.pm | 4 ++--
t/mid.t | 7 ++++++-
t/v2writable.t | 16 ++++++++++++++++
5 files changed, 47 insertions(+), 11 deletions(-)
diff --git a/lib/PublicInbox/MID.pm b/lib/PublicInbox/MID.pm
index 14089f91..d7a42c38 100644
--- a/lib/PublicInbox/MID.pm
+++ b/lib/PublicInbox/MID.pm
@@ -7,7 +7,7 @@ use strict;
use warnings;
use base qw/Exporter/;
our @EXPORT_OK = qw/mid_clean id_compress mid2path mid_mime mid_escape MID_ESC
- mids references/;
+ mids references mids_for_index/;
use URI::Escape qw(uri_escape_utf8);
use Digest::SHA qw/sha1_hex/;
require PublicInbox::Address;
@@ -54,11 +54,10 @@ sub mid2path {
# Only for v1 code paths:
sub mid_mime ($) { mids($_[0]->header_obj)->[0] }
-sub mids ($) {
- my ($hdr) = @_;
+# only intended for Message-ID and X-Alt-Message-ID
+sub extract_mids {
my @mids;
- my @v = $hdr->header_raw('Message-Id');
- foreach my $v (@v) {
+ for my $v (@_) {
my @cur = ($v =~ /<([^>]+)>/sg);
if (@cur) {
push(@mids, @cur);
@@ -66,7 +65,23 @@ sub mids ($) {
push(@mids, $v);
}
}
- uniq_mids(\@mids);
+ \@mids;
+}
+
+sub mids ($) {
+ my ($hdr) = @_;
+ my @mids = $hdr->header_raw('Message-Id');
+ uniq_mids(extract_mids(@mids));
+}
+
+# we allow searching on X-Alt-Message-ID since PublicInbox::NNTP uses them
+# to placate some clients, and we want to ensure NNTP-only clients can
+# import and index without relying on HTTP endpoints
+sub mids_for_index ($) {
+ my ($hdr) = @_;
+ my @mids = $hdr->header_raw('Message-Id');
+ my @alts = $hdr->header_raw('X-Alt-Message-ID');
+ uniq_mids(extract_mids(@mids, @alts));
}
# last References should be IRT, but some mail clients do things
diff --git a/lib/PublicInbox/OverIdx.pm b/lib/PublicInbox/OverIdx.pm
index 01ca6f11..189bd21d 100644
--- a/lib/PublicInbox/OverIdx.pm
+++ b/lib/PublicInbox/OverIdx.pm
@@ -13,7 +13,7 @@ use warnings;
use base qw(PublicInbox::Over);
use IO::Handle;
use DBI qw(:sql_types); # SQL_BLOB
-use PublicInbox::MID qw/id_compress mids references/;
+use PublicInbox::MID qw/id_compress mids_for_index references/;
use PublicInbox::SearchMsg qw(subject_normalized);
use Compress::Zlib qw(compress);
use PublicInbox::Search;
@@ -256,7 +256,7 @@ sub add_overview {
lines => $lines,
blob => $oid,
}, 'PublicInbox::SearchMsg';
- my $mids = mids($mime->header_obj);
+ my $mids = mids_for_index($mime->header_obj);
my $refs = parse_references($smsg, $mid0, $mids);
my $subj = $smsg->subject;
my $xpath;
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index aed3875a..b2d71a1f 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -12,7 +12,7 @@ use warnings;
use base qw(PublicInbox::Search PublicInbox::Lock);
use PublicInbox::MIME;
use PublicInbox::InboxWritable;
-use PublicInbox::MID qw/mid_clean id_compress mid_mime mids/;
+use PublicInbox::MID qw/mid_clean id_compress mid_mime mids_for_index/;
use PublicInbox::MsgIter;
use Carp qw(croak);
use POSIX qw(strftime);
@@ -344,7 +344,7 @@ sub add_xapian ($$$$$) {
sub add_message {
# mime = Email::MIME object
my ($self, $mime, $bytes, $num, $oid, $mid0) = @_;
- my $mids = mids($mime->header_obj);
+ my $mids = mids_for_index($mime->header_obj);
$mid0 = $mids->[0] unless defined $mid0; # v1 compatibility
unless (defined $num) { # v1
$self->_msgmap_init;
diff --git a/t/mid.t b/t/mid.t
index 9ad10a99..98b0c200 100644
--- a/t/mid.t
+++ b/t/mid.t
@@ -1,7 +1,7 @@
# Copyright (C) 2016-2019 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use Test::More;
-use PublicInbox::MID qw(mid_escape mids references);
+use PublicInbox::MID qw(mid_escape mids references mids_for_index);
is(mid_escape('foo!@(bar)'), 'foo!@(bar)');
is(mid_escape('foo%!@(bar)'), 'foo%25!@(bar)');
@@ -10,6 +10,7 @@ is(mid_escape('foo%!@(bar)'), 'foo%25!@(bar)');
{
use Email::MIME;
my $mime = Email::MIME->create;
+ $mime->header_set('X-Alt-Message-ID', '<alt-id-for-nntp>');
$mime->header_set('Message-Id', '<mid-1@a>');
is_deeply(['mid-1@a'], mids($mime->header_obj), 'mids in common case');
$mime->header_set('Message-Id', '<mid-1@a>', '<mid-2@b>');
@@ -40,6 +41,10 @@ is(mid_escape('foo%!@(bar)'), 'foo%25!@(bar)');
$mime->header_set('To', 'u@example.com');
$mime->header_set('References', '<hello> <world> <n> <u@example.com>');
is_deeply(references($mime->header_obj), [qw(hello world)]);
+
+ is_deeply([qw(helloworld alt-id-for-nntp)],
+ mids_for_index($mime->header_obj),
+ 'X-Alt-Message-ID can be indexed');
}
done_testing();
diff --git a/t/v2writable.t b/t/v2writable.t
index c2daac2f..2b825768 100644
--- a/t/v2writable.t
+++ b/t/v2writable.t
@@ -115,6 +115,7 @@ if ('ensure git configs are correct') {
{
$mime->header_set('Message-Id', '<abcde@1>', '<abcde@2>');
+ $mime->header_set('X-Alt-Message-Id', '<alt-id-for-nntp>');
$mime->header_set('References', '<zz-mid@b>');
ok($im->add($mime), 'message with multiple Message-ID');
$im->done;
@@ -127,6 +128,21 @@ if ('ensure git configs are correct') {
is($mset2->size, 1, 'message found by second MID');
is((($mset1->items)[0])->get_docid, (($mset2->items)[0])->get_docid,
'same document') if ($mset1->size);
+
+ my $alt = $srch->reopen->query('m:alt-id-for-nntp', { mset => 1 });
+ is($alt->size, 1, 'message found by alt MID (NNTP)');
+ is((($alt->items)[0])->get_docid, (($mset1->items)[0])->get_docid,
+ 'same document') if ($mset1->size);
+ $mime->header_set('X-Alt-Message-Id');
+
+ my %uniq;
+ for my $mid (qw(abcde@1 abcde@2 alt-id-for-nntp)) {
+ my $msgs = $ibx->over->get_thread($mid);
+ my $key = join(' ', sort(map { $_->{num} } @$msgs));
+ $uniq{$key}++;
+ }
+ is(scalar(keys(%uniq)), 1, 'all alt Message-ID queries give same smsg');
+ is_deeply([values(%uniq)], [3], '3 queries, 3 results');
}
{
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [RFC 7/7] view: show X-Alt-Message-ID in permalink view, too
2019-10-24 0:12 [PATCH 0/7] redundant header madness Eric Wong
` (5 preceding siblings ...)
2019-10-24 0:12 ` [RFC 6/7] index: allow search/lookups on X-Alt-Message-ID Eric Wong
@ 2019-10-24 0:12 ` Eric Wong
6 siblings, 0 replies; 8+ messages in thread
From: Eric Wong @ 2019-10-24 0:12 UTC (permalink / raw)
To: meta
Since we index X-Alt-Message-ID (because we need to placate some
NNTP clients), we now display it as well, since that Message-ID
could be the X-Alt-Message-ID that the reader is actually
interested in.
---
lib/PublicInbox/View.pm | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 00bf38a9..39b04174 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -10,7 +10,7 @@ use bytes (); # only for bytes::length
use PublicInbox::MsgTime qw(msg_datestamp);
use PublicInbox::Hval qw/ascii_html obfuscate_addrs/;
use PublicInbox::Linkify;
-use PublicInbox::MID qw/id_compress mid_escape mids references/;
+use PublicInbox::MID qw/id_compress mid_escape mids mids_for_index references/;
use PublicInbox::MsgIter;
use PublicInbox::Address;
use PublicInbox::WwwStream;
@@ -629,7 +629,7 @@ sub _msg_html_prepare {
my $over = $ctx->{-inbox}->over;
my $obfs_ibx = $ctx->{-obfs_ibx};
my $rv = '';
- my $mids = mids($hdr);
+ my $mids = mids_for_index($hdr);
if ($nr == 0) {
if ($more) {
$rv .=
@@ -691,9 +691,13 @@ sub _msg_html_prepare {
$rv .= "Message-ID: <$mhtml> ";
$rv .= "(<a\nhref=\"raw\">raw</a>)\n";
} else {
+ # X-Alt-Message-ID can happen if a message is injected from
+ # public-inbox-nntpd because of multiple Message-ID headers.
my $lnk = PublicInbox::Linkify->new;
my $s = '';
- $s .= "Message-ID: $_\n" for ($hdr->header_raw('Message-ID'));
+ for my $h (qw(Message-ID X-Alt-Message-ID)) {
+ $s .= "$h: $_\n" for ($hdr->header_raw($h));
+ }
$lnk->linkify_mids('..', \$s, 1);
$rv .= $s;
}
^ permalink raw reply related [flat|nested] 8+ messages in thread