* [PATCH 01/11] import: consolidate mid prepend logic, here
2018-03-27 11:11 [PATCH 00/11] duplicate support in UI + tests Eric Wong (Contractor, The Linux Foundation)
@ 2018-03-27 11:11 ` Eric Wong (Contractor, The Linux Foundation)
2018-03-27 11:11 ` [PATCH 02/11] www: $MESSAGE_ID/raw endpoint supports "duplicates" Eric Wong (Contractor, The Linux Foundation)
` (9 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Eric Wong (Contractor, The Linux Foundation) @ 2018-03-27 11:11 UTC (permalink / raw)
To: meta
This also quiets down warnings from -watch when spam training
happens on messages without Message-Id.
---
lib/PublicInbox/Import.pm | 31 ++++++++++++++++++++++++-------
lib/PublicInbox/V2Writable.pm | 3 +--
2 files changed, 25 insertions(+), 9 deletions(-)
diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm
index 5d116a1..6824fac 100644
--- a/lib/PublicInbox/Import.pm
+++ b/lib/PublicInbox/Import.pm
@@ -203,7 +203,7 @@ sub remove {
my ($r, $w) = $self->gfi_start;
my $tip = $self->{tip};
if ($path_type eq '2/38') {
- $path = mid2path(mid_mime($mime));
+ $path = mid2path(v1_mid0($mime));
($err, $cur) = check_remove_v1($r, $w, $tip, $path, $mime);
return ($err, $cur) if $err;
} else {
@@ -296,6 +296,28 @@ sub drop_unwanted_headers ($) {
$mime->header_set($_) for @PublicInbox::MDA::BAD_HEADERS;
}
+# used by V2Writable, too
+sub prepend_mid ($$) {
+ my ($hdr, $mid0) = @_;
+ # @cur is likely empty if we need to call this sub, but it could
+ # have random unparseable crap which we'll preserve, too.
+ my @cur = $hdr->header_raw('Message-Id');
+ $hdr->header_set('Message-Id', "<$mid0>", @cur);
+}
+
+sub v1_mid0 ($) {
+ my ($mime) = @_;
+ my $hdr = $mime->header_obj;
+ my $mids = mids($hdr);
+
+ if (!scalar(@$mids)) { # spam often has no Message-Id
+ my $mid0 = digest2mid(content_digest($mime));
+ prepend_mid($hdr, $mid0);
+ return $mid0;
+ }
+ $mids->[0];
+}
+
# returns undef on duplicate
# returns the :MARK of the most recent commit
sub add {
@@ -313,12 +335,7 @@ sub add {
my $path;
if ($path_type eq '2/38') {
- my $mids = mids($mime->header_obj);
- if (!scalar(@$mids)) {
- my $dig = content_digest($mime);
- @$mids = (digest2mid($dig));
- }
- $path = mid2path($mids->[0]);
+ $path = mid2path(v1_mid0($mime));
} else { # v2 layout, one file:
$path = 'm';
}
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index 20c2736..b04e6fb 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -173,8 +173,7 @@ sub num_for_harder {
$num = $self->{skel}->{mm}->mid_insert($$mid0);
}
}
- my @cur = $hdr->header_raw('Message-Id');
- $hdr->header_set('Message-Id', "<$$mid0>", @cur);
+ PublicInbox::Import::prepend_mid($hdr, $$mid0);
$num;
}
--
EW
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 02/11] www: $MESSAGE_ID/raw endpoint supports "duplicates"
2018-03-27 11:11 [PATCH 00/11] duplicate support in UI + tests Eric Wong (Contractor, The Linux Foundation)
2018-03-27 11:11 ` [PATCH 01/11] import: consolidate mid prepend logic, here Eric Wong (Contractor, The Linux Foundation)
@ 2018-03-27 11:11 ` Eric Wong (Contractor, The Linux Foundation)
2018-03-27 11:11 ` [PATCH 03/11] search: reopen DB if each_smsg_by_mid fails Eric Wong (Contractor, The Linux Foundation)
` (8 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Eric Wong (Contractor, The Linux Foundation) @ 2018-03-27 11:11 UTC (permalink / raw)
To: meta
Since v2 supports duplicate messages, we need to support
looking up different messages with the same Message-Id.
Fortunately, our "raw" endpoint has always been mboxrd,
so users won't need to change their parsing tools.
---
MANIFEST | 1 +
lib/PublicInbox/Mbox.pm | 71 ++++++++++++++++++++++++++----
lib/PublicInbox/Search.pm | 1 +
lib/PublicInbox/WWW.pm | 3 +-
t/psgi_v2.t | 110 ++++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 175 insertions(+), 11 deletions(-)
create mode 100644 t/psgi_v2.t
diff --git a/MANIFEST b/MANIFEST
index 0f88995..8b2b10b 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -171,6 +171,7 @@ t/psgi_attach.t
t/psgi_mount.t
t/psgi_search.t
t/psgi_text.t
+t/psgi_v2.t
t/qspawn.t
t/reply.t
t/search-thr-index.t
diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm
index 84cc384..79e09a7 100644
--- a/lib/PublicInbox/Mbox.pm
+++ b/lib/PublicInbox/Mbox.pm
@@ -26,12 +26,68 @@ sub subject_fn ($) {
$fn eq '' ? 'no-subject' : $fn;
}
-sub emit1 {
- my ($ctx, $msg) = @_;
- $msg = Email::Simple->new($msg);
- my $fn = subject_fn($msg);
+sub smsg_for ($$$) {
+ my ($head, $db, $mid) = @_;
+ my $doc_id = $head->get_docid;
+ my $doc = $db->get_document($doc_id);
+ PublicInbox::SearchMsg->wrap($doc, $mid)->load_expand;
+}
+
+sub mb_stream {
+ my ($more) = @_;
+ bless $more, 'PublicInbox::Mbox';
+}
+
+# called by PSGI server as body response
+sub getline {
+ my ($more) = @_; # self
+ my ($ctx, $head, $tail, $db, $cur) = @$more;
+ if ($cur) {
+ pop @$more;
+ return msg_str($ctx, $cur);
+ }
+ for (; !defined($cur) && $head != $tail; $head++) {
+ my $smsg = smsg_for($head, $db, $ctx->{mid});
+ next if $smsg->type ne 'mail';
+ my $mref = $ctx->{-inbox}->msg_by_smsg($smsg) or next;
+ $cur = Email::Simple->new($mref);
+ $cur = msg_str($ctx, $cur);
+ }
+ $more->[1] = $head;
+ $cur;
+}
+
+sub close {} # noop
+
+sub emit_raw {
+ my ($ctx) = @_;
+ my $mid = $ctx->{mid};
+ my $ibx = $ctx->{-inbox};
+ my $first;
+ my $more;
+ my ($head, $tail, $db);
+ my %seen;
+ if (my $srch = $ibx->search) {
+ $srch->retry_reopen(sub {
+ ($head, $tail, $db) = $srch->each_smsg_by_mid($mid);
+ for (; !defined($first) && $head != $tail; $head++) {
+ my $smsg = smsg_for($head, $db, $mid);
+ next if $smsg->type ne 'mail';
+ my $mref = $ibx->msg_by_smsg($smsg) or next;
+ $first = Email::Simple->new($mref);
+ }
+ if ($head != $tail) {
+ $more = [ $ctx, $head, $tail, $db, $first ];
+ }
+ });
+ } else {
+ my $mref = $ibx->msg_by_mid($mid) or return;
+ $first = Email::Simple->new($mref);
+ }
+ return unless defined $first;
+ my $fn = subject_fn($first);
my @hdr = ('Content-Type');
- if ($ctx->{-inbox}->{obfuscate}) {
+ if ($ibx->{obfuscate}) {
# obfuscation is stupid, but maybe scrapers are, too...
push @hdr, 'application/mbox';
$fn .= '.mbox';
@@ -40,10 +96,7 @@ sub emit1 {
$fn .= '.txt';
}
push @hdr, 'Content-Disposition', "inline; filename=$fn";
-
- # single message should be easily renderable in browsers,
- # unless obfuscation is enabled :<
- [ 200, \@hdr, [ msg_str($ctx, $msg) ] ]
+ [ 200, \@hdr, $more ? mb_stream($more) : [ msg_str($ctx, $first) ] ];
}
sub msg_str {
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index f08b987..24600ee 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -417,6 +417,7 @@ sub each_smsg_by_mid {
my $term = 'Q' . $mid;
my $head = $db->postlist_begin($term);
my $tail = $db->postlist_end($term);
+ return ($head, $tail, $db) if wantarray;
for (; $head->nequal($tail); $head->inc) {
my $doc_id = $head->get_docid;
my $doc = $db->get_document($doc_id);
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index e95fba0..f86363c 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -218,9 +218,8 @@ sub mid2blob {
# /$INBOX/$MESSAGE_ID/raw -> raw mbox
sub get_mid_txt {
my ($ctx) = @_;
- my $x = mid2blob($ctx) or return r404($ctx);
require PublicInbox::Mbox;
- PublicInbox::Mbox::emit1($ctx, $x);
+ PublicInbox::Mbox::emit_raw($ctx) || r404($ctx);
}
# /$INBOX/$MESSAGE_ID/ -> HTML content (short quotes)
diff --git a/t/psgi_v2.t b/t/psgi_v2.t
new file mode 100644
index 0000000..5d089db
--- /dev/null
+++ b/t/psgi_v2.t
@@ -0,0 +1,110 @@
+# Copyright (C) 2018 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use warnings;
+use Test::More;
+use File::Temp qw/tempdir/;
+use PublicInbox::MIME;
+use PublicInbox::Config;
+use PublicInbox::WWW;
+my @mods = qw(DBD::SQLite Search::Xapian HTTP::Request::Common Plack::Test
+ URI::Escape Plack::Builder);
+foreach my $mod (@mods) {
+ eval "require $mod";
+ plan skip_all => "$mod missing for psgi_v2_dupes.t" if $@;
+}
+use_ok($_) for @mods;
+use_ok 'PublicInbox::V2Writable';
+my $mainrepo = tempdir('pi-v2_dupes-XXXXXX', TMPDIR => 1, CLEANUP => 1);
+my $ibx = {
+ mainrepo => $mainrepo,
+ name => 'test-v2writable',
+ version => 2,
+ -primary_address => 'test@example.com',
+};
+$ibx = PublicInbox::Inbox->new($ibx);
+my $new_mid;
+
+my $im = PublicInbox::V2Writable->new($ibx, 1);
+$im->{parallel} = 0;
+
+my $mime = PublicInbox::MIME->create(
+ header => [
+ From => 'a@example.com',
+ To => 'test@example.com',
+ Subject => 'this is a subject',
+ 'Message-ID' => '<a-mid@b>',
+ Date => 'Fri, 02 Oct 1993 00:00:00 +0000',
+ ],
+ body => "hello world\n",
+);
+ok($im->add($mime), 'added one message');
+$mime->body_set("hello world!\n");
+
+my @warn;
+local $SIG{__WARN__} = sub { push @warn, @_ };
+ok($im->add($mime), 'added duplicate-but-different message');
+is(scalar(@warn), 1, 'got one warning');
+my @mids = $mime->header_obj->header_raw('Message-Id');
+$new_mid = PublicInbox::MID::mid_clean($mids[0]);
+$im->done;
+
+my $cfgpfx = "publicinbox.v2test";
+my %cfg = (
+ "$cfgpfx.address" => $ibx->{-primary_address},
+ "$cfgpfx.mainrepo" => $mainrepo,
+);
+
+my $config = PublicInbox::Config->new({ %cfg });
+my $www = PublicInbox::WWW->new($config);
+my ($res, $raw, @from_);
+test_psgi(sub { $www->call(@_) }, sub {
+ my ($cb) = @_;
+ $res = $cb->(GET('/v2test/a-mid@b/raw'));
+ $raw = $res->content;
+ like($raw, qr/^hello world$/m, 'got first message');
+ like($raw, qr/^hello world!$/m, 'got second message');
+ @from_ = ($raw =~ m/^From /mg);
+ is(scalar(@from_), 2, 'two From_ lines');
+
+ $res = $cb->(GET("/v2test/$new_mid/raw"));
+ $raw = $res->content;
+ like($raw, qr/^hello world!$/m, 'second message with new Message-Id');
+ @from_ = ($raw =~ m/^From /mg);
+ is(scalar(@from_), 1, 'only one From_ line');
+});
+
+$mime->header_set('Message-Id', 'a-mid@b');
+$mime->body_set("hello ghosts\n");
+ok($im->add($mime), 'added 3rd duplicate-but-different message');
+is(scalar(@warn), 2, 'got another warning');
+like($warn[0], qr/mismatched/, 'warned about mismatched messages');
+is($warn[0], $warn[1], 'both warnings are the same');
+
+@mids = $mime->header_obj->header_raw('Message-Id');
+my $third = PublicInbox::MID::mid_clean($mids[0]);
+$im->done;
+
+# need to reload...
+$config = PublicInbox::Config->new({ %cfg });
+$www = PublicInbox::WWW->new($config);
+test_psgi(sub { $www->call(@_) }, sub {
+ my ($cb) = @_;
+ $res = $cb->(GET("/v2test/$third/raw"));
+ $raw = $res->content;
+ like($raw, qr/^hello ghosts$/m, 'got third message');
+ @from_ = ($raw =~ m/^From /mg);
+ is(scalar(@from_), 1, 'one From_ line');
+
+ $res = $cb->(GET('/v2test/a-mid@b/raw'));
+ $raw = $res->content;
+ like($raw, qr/^hello world$/m, 'got first message');
+ like($raw, qr/^hello world!$/m, 'got second message');
+ like($raw, qr/^hello ghosts$/m, 'got third message');
+ @from_ = ($raw =~ m/^From /mg);
+ is(scalar(@from_), 3, 'three From_ lines');
+});
+
+done_testing();
+
+1;
--
EW
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 03/11] search: reopen DB if each_smsg_by_mid fails
2018-03-27 11:11 [PATCH 00/11] duplicate support in UI + tests Eric Wong (Contractor, The Linux Foundation)
2018-03-27 11:11 ` [PATCH 01/11] import: consolidate mid prepend logic, here Eric Wong (Contractor, The Linux Foundation)
2018-03-27 11:11 ` [PATCH 02/11] www: $MESSAGE_ID/raw endpoint supports "duplicates" Eric Wong (Contractor, The Linux Foundation)
@ 2018-03-27 11:11 ` Eric Wong (Contractor, The Linux Foundation)
2018-03-27 11:11 ` [PATCH 04/11] t/psgi_v2: minimal test for Atom feed and t.mbox.gz Eric Wong (Contractor, The Linux Foundation)
` (7 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Eric Wong (Contractor, The Linux Foundation) @ 2018-03-27 11:11 UTC (permalink / raw)
To: meta
This gives more-up-to-date data in case and allows us
to avoid reopening in more places ourselves.
---
lib/PublicInbox/Search.pm | 5 +++++
t/psgi_v2.t | 10 +++-------
2 files changed, 8 insertions(+), 7 deletions(-)
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 24600ee..a4e2498 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -417,6 +417,11 @@ sub each_smsg_by_mid {
my $term = 'Q' . $mid;
my $head = $db->postlist_begin($term);
my $tail = $db->postlist_end($term);
+ if ($head == $tail) {
+ $db->reopen;
+ $head = $db->postlist_begin($term);
+ $tail = $db->postlist_end($term);
+ }
return ($head, $tail, $db) if wantarray;
for (; $head->nequal($tail); $head->inc) {
my $doc_id = $head->get_docid;
diff --git a/t/psgi_v2.t b/t/psgi_v2.t
index 5d089db..6a2ea5b 100644
--- a/t/psgi_v2.t
+++ b/t/psgi_v2.t
@@ -50,12 +50,11 @@ $new_mid = PublicInbox::MID::mid_clean($mids[0]);
$im->done;
my $cfgpfx = "publicinbox.v2test";
-my %cfg = (
+my $cfg = {
"$cfgpfx.address" => $ibx->{-primary_address},
"$cfgpfx.mainrepo" => $mainrepo,
-);
-
-my $config = PublicInbox::Config->new({ %cfg });
+};
+my $config = PublicInbox::Config->new($cfg);
my $www = PublicInbox::WWW->new($config);
my ($res, $raw, @from_);
test_psgi(sub { $www->call(@_) }, sub {
@@ -85,9 +84,6 @@ is($warn[0], $warn[1], 'both warnings are the same');
my $third = PublicInbox::MID::mid_clean($mids[0]);
$im->done;
-# need to reload...
-$config = PublicInbox::Config->new({ %cfg });
-$www = PublicInbox::WWW->new($config);
test_psgi(sub { $www->call(@_) }, sub {
my ($cb) = @_;
$res = $cb->(GET("/v2test/$third/raw"));
--
EW
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 04/11] t/psgi_v2: minimal test for Atom feed and t.mbox.gz
2018-03-27 11:11 [PATCH 00/11] duplicate support in UI + tests Eric Wong (Contractor, The Linux Foundation)
` (2 preceding siblings ...)
2018-03-27 11:11 ` [PATCH 03/11] search: reopen DB if each_smsg_by_mid fails Eric Wong (Contractor, The Linux Foundation)
@ 2018-03-27 11:11 ` Eric Wong (Contractor, The Linux Foundation)
2018-03-27 11:11 ` [PATCH 05/11] feed: fix new.html for v2 Eric Wong (Contractor, The Linux Foundation)
` (6 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Eric Wong (Contractor, The Linux Foundation) @ 2018-03-27 11:11 UTC (permalink / raw)
To: meta
Some test coverage is better than none, here.
---
t/psgi_v2.t | 22 ++++++++++++++++++++++
1 file changed, 22 insertions(+)
diff --git a/t/psgi_v2.t b/t/psgi_v2.t
index 6a2ea5b..7389798 100644
--- a/t/psgi_v2.t
+++ b/t/psgi_v2.t
@@ -43,6 +43,7 @@ $mime->body_set("hello world!\n");
my @warn;
local $SIG{__WARN__} = sub { push @warn, @_ };
+$mime->header_set(Date => 'Fri, 02 Oct 1993 00:01:00 +0000');
ok($im->add($mime), 'added duplicate-but-different message');
is(scalar(@warn), 1, 'got one warning');
my @mids = $mime->header_obj->header_raw('Message-Id');
@@ -71,6 +72,12 @@ test_psgi(sub { $www->call(@_) }, sub {
like($raw, qr/^hello world!$/m, 'second message with new Message-Id');
@from_ = ($raw =~ m/^From /mg);
is(scalar(@from_), 1, 'only one From_ line');
+
+ # Atom feed should sort by Date: (if Received is missing)
+ $res = $cb->(GET('/v2test/new.atom'));
+ my @bodies = ($res->content =~ />(hello [^<]+)</mg);
+ is_deeply(\@bodies, [ "hello world!\n", "hello world\n" ],
+ 'Atom ordering is chronological');
});
$mime->header_set('Message-Id', 'a-mid@b');
@@ -99,6 +106,21 @@ test_psgi(sub { $www->call(@_) }, sub {
like($raw, qr/^hello ghosts$/m, 'got third message');
@from_ = ($raw =~ m/^From /mg);
is(scalar(@from_), 3, 'three From_ lines');
+
+ SKIP: {
+ eval { require IO::Uncompress::Gunzip };
+ skip 'IO::Uncompress::Gunzip missing', 4 if $@;
+
+ $res = $cb->(GET('/v2test/a-mid@b/t.mbox.gz'));
+ my $out;
+ my $in = $res->content;
+ my $status = IO::Uncompress::Gunzip::gunzip(\$in => \$out);
+ like($out, qr/^hello world$/m, 'got first in t.mbox.gz');
+ like($out, qr/^hello world!$/m, 'got second in t.mbox.gz');
+ like($out, qr/^hello ghosts$/m, 'got third in t.mbox.gz');
+ @from_ = ($raw =~ m/^From /mg);
+ is(scalar(@from_), 3, 'three From_ lines in t.mbox.gz');
+ };
});
done_testing();
--
EW
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 05/11] feed: fix new.html for v2
2018-03-27 11:11 [PATCH 00/11] duplicate support in UI + tests Eric Wong (Contractor, The Linux Foundation)
` (3 preceding siblings ...)
2018-03-27 11:11 ` [PATCH 04/11] t/psgi_v2: minimal test for Atom feed and t.mbox.gz Eric Wong (Contractor, The Linux Foundation)
@ 2018-03-27 11:11 ` Eric Wong (Contractor, The Linux Foundation)
2018-03-27 11:11 ` [PATCH 06/11] view: permalink (per-message) view shows multiple messages Eric Wong (Contractor, The Linux Foundation)
` (5 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Eric Wong (Contractor, The Linux Foundation) @ 2018-03-27 11:11 UTC (permalink / raw)
To: meta
I forget this endpoint is still accessible (even if not linked).
This also simplifies new.html all around and removes some unused
clutter from the old days while we're at it.
---
lib/PublicInbox/Feed.pm | 82 ++++++++++++++++++++-----------------------------
t/psgi_v2.t | 6 ++++
2 files changed, 39 insertions(+), 49 deletions(-)
diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index 3277b09..74d0bbd 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -12,13 +12,12 @@ use PublicInbox::WwwAtomStream;
# main function
sub generate {
my ($ctx) = @_;
- my @oids;
- each_recent_blob($ctx, sub { push @oids, $_[0] });
- return _no_thread() unless @oids;
+ my $oids = recent_blobs($ctx);
+ return _no_thread() unless @$oids;
my $git = $ctx->{-inbox}->git;
PublicInbox::WwwAtomStream->response($ctx, 200, sub {
- while (my $oid = shift @oids) {
+ while (my $oid = shift @$oids) {
my $msg = $git->cat_file($oid) or next;
return PublicInbox::MIME->new($msg);
}
@@ -63,14 +62,8 @@ sub generate_html_index {
sub new_html {
my ($ctx) = @_;
- die "BUG: new_html is not used with search" if $ctx->{srch};
- my @oids;
- my (undef, $last) = each_recent_blob($ctx, sub {
- my ($oid, $commit, $ts, $u, $subj) = @_;
- $ctx->{first} ||= $commit;
- push @oids, $oid;
- });
- if (!@oids) {
+ my $oids = recent_blobs($ctx);
+ if (!@$oids) {
return [404, ['Content-Type', 'text/plain'],
["No messages, yet\n"] ];
}
@@ -79,13 +72,13 @@ sub new_html {
$ctx->{-hr} = 1;
my $git = $ctx->{-inbox}->git;
PublicInbox::WwwStream->response($ctx, 200, sub {
- while (my $oid = shift @oids) {
+ while (my $oid = shift @$oids) {
my $msg = $git->cat_file($oid) or next;
my $m = PublicInbox::MIME->new($msg);
- my $more = scalar @oids;
+ my $more = scalar @$oids;
return PublicInbox::View::index_entry($m, $ctx, $more);
}
- new_html_footer($ctx, $last);
+ new_html_footer($ctx);
});
}
@@ -96,45 +89,43 @@ sub _no_thread () {
}
sub new_html_footer {
- my ($ctx, $last) = @_;
+ my ($ctx) = @_;
my $qp = delete $ctx->{qp} or return;
- my $old_r = $qp->{r};
my $latest = '';
- my $next = ' ';
-
- if ($last) {
- $next = qq!<a\nhref="?r=$last"\nrel=next>next</a>!;
+ my $next = delete $ctx->{next_page} || '';
+ if ($next) {
+ $next = qq!<a\nhref="?$next"\nrel=next>next</a>!;
}
- if ($old_r) {
+ if (!$qp) {
$latest = qq! <a\nhref='./new.html'>latest</a>!;
+ $next ||= ' ';
}
"<hr><pre>page: $next$latest</pre>";
}
-sub each_recent_blob {
- my ($ctx, $cb) = @_;
+sub recent_blobs {
+ my ($ctx) = @_;
my $ibx = $ctx->{-inbox};
my $max = $ibx->{feedmax};
+ my $qp = $ctx->{qp};
my $v = $ibx->{version} || 1;
- if ($v == 2) {
- wantarray and die "each_recent_blob return ignored for v2";
- } elsif ($v != 1) {
+ if ($v > 2) {
die "BUG: unsupported inbox version: $v\n";
}
if (my $srch = $ibx->search) {
- my $res = $srch->query('', { limit => $max });
- foreach my $smsg (@{$res->{msgs}}) {
- # search-enabled callers do not need author/date/subject
- $cb->($smsg->{blob});
- }
- return;
+ my $o = $qp ? $qp->{o} : 0;
+ $o += 0;
+ $o = 0 if $o < 0;
+ my $res = $srch->query('', { limit => $max, offset => $o });
+ my $next = $o + $max;
+ $ctx->{next_page} = "o=$next" if $res->{total} >= $next;
+ return [ map { $_->{blob} } @{$res->{msgs}} ];
}
my $hex = '[a-f0-9]';
my $addmsg = qr!^:000000 100644 \S+ (\S+) A\t${hex}{2}/${hex}{38}$!;
my $delmsg = qr!^:100644 000000 (\S+) \S+ D\t(${hex}{2}/${hex}{38})$!;
my $refhex = qr/(?:HEAD|${hex}{4,40})(?:~\d+)?/;
- my $qp = $ctx->{qp};
# revision ranges may be specified
my $range = 'HEAD';
@@ -149,45 +140,38 @@ sub each_recent_blob {
my $log = $ibx->git->popen(qw/log
--no-notes --no-color --raw -r
--no-abbrev --abbrev-commit/,
- "--format=%h%x00%ct%x00%an%x00%s%x00",
- $range);
+ "--format=%h", $range);
my %deleted; # only an optimization at this point
my $last;
- my $nr = 0;
- my ($cur_commit, $first_commit, $last_commit);
- my ($ts, $subj, $u);
+ my $last_commit;
local $/ = "\n";
+ my @oids;
while (defined(my $line = <$log>)) {
if ($line =~ /$addmsg/o) {
my $add = $1;
next if $deleted{$add}; # optimization-only
- $cb->($add, $cur_commit, $ts, $u, $subj) and $nr++;
- if ($nr >= $max) {
+ push @oids, $add;
+ if (scalar(@oids) >= $max) {
$last = 1;
last;
}
} elsif ($line =~ /$delmsg/o) {
$deleted{$1} = 1;
- } elsif ($line =~ /^${hex}{7,40}/o) {
- ($cur_commit, $ts, $u, $subj) = split("\0", $line);
- unless (defined $first_commit) {
- $first_commit = $cur_commit;
- }
}
}
if ($last) {
local $/ = "\n";
while (my $line = <$log>) {
- if ($line =~ /^(${hex}{7,40})/o) {
+ if ($line =~ /^(${hex}{7,40})/) {
$last_commit = $1;
last;
}
}
}
- # for pagination
- ($first_commit, $last_commit);
+ $ctx->{next_page} = "r=$last_commit" if $last_commit;
+ \@oids;
}
1;
diff --git a/t/psgi_v2.t b/t/psgi_v2.t
index 7389798..1e45c26 100644
--- a/t/psgi_v2.t
+++ b/t/psgi_v2.t
@@ -78,6 +78,12 @@ test_psgi(sub { $www->call(@_) }, sub {
my @bodies = ($res->content =~ />(hello [^<]+)</mg);
is_deeply(\@bodies, [ "hello world!\n", "hello world\n" ],
'Atom ordering is chronological');
+
+ # new.html should sort by Date:, too (if Received is missing)
+ $res = $cb->(GET('/v2test/new.html'));
+ @bodies = ($res->content =~ /^(hello [^<]+)$/mg);
+ is_deeply(\@bodies, [ "hello world!\n", "hello world\n" ],
+ 'new.html ordering is chronological');
});
$mime->header_set('Message-Id', 'a-mid@b');
--
EW
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 06/11] view: permalink (per-message) view shows multiple messages
2018-03-27 11:11 [PATCH 00/11] duplicate support in UI + tests Eric Wong (Contractor, The Linux Foundation)
` (4 preceding siblings ...)
2018-03-27 11:11 ` [PATCH 05/11] feed: fix new.html for v2 Eric Wong (Contractor, The Linux Foundation)
@ 2018-03-27 11:11 ` Eric Wong (Contractor, The Linux Foundation)
2018-03-27 11:11 ` [PATCH 07/11] searchidx: warn about vivifying multiple ghosts Eric Wong (Contractor, The Linux Foundation)
` (4 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Eric Wong (Contractor, The Linux Foundation) @ 2018-03-27 11:11 UTC (permalink / raw)
To: meta
This needs tests and further refinement, but current tests pass.
---
lib/PublicInbox/Mbox.pm | 12 ++---
lib/PublicInbox/SearchMsg.pm | 7 +++
lib/PublicInbox/View.pm | 107 ++++++++++++++++++++++++++++++++++++++-----
lib/PublicInbox/WWW.pm | 7 +--
t/psgi_v2.t | 12 +++++
5 files changed, 118 insertions(+), 27 deletions(-)
diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm
index 79e09a7..c14037f 100644
--- a/lib/PublicInbox/Mbox.pm
+++ b/lib/PublicInbox/Mbox.pm
@@ -26,13 +26,6 @@ sub subject_fn ($) {
$fn eq '' ? 'no-subject' : $fn;
}
-sub smsg_for ($$$) {
- my ($head, $db, $mid) = @_;
- my $doc_id = $head->get_docid;
- my $doc = $db->get_document($doc_id);
- PublicInbox::SearchMsg->wrap($doc, $mid)->load_expand;
-}
-
sub mb_stream {
my ($more) = @_;
bless $more, 'PublicInbox::Mbox';
@@ -47,7 +40,7 @@ sub getline {
return msg_str($ctx, $cur);
}
for (; !defined($cur) && $head != $tail; $head++) {
- my $smsg = smsg_for($head, $db, $ctx->{mid});
+ my $smsg = PublicInbox::SearchMsg->get($head, $db, $ctx->{mid});
next if $smsg->type ne 'mail';
my $mref = $ctx->{-inbox}->msg_by_smsg($smsg) or next;
$cur = Email::Simple->new($mref);
@@ -71,7 +64,8 @@ sub emit_raw {
$srch->retry_reopen(sub {
($head, $tail, $db) = $srch->each_smsg_by_mid($mid);
for (; !defined($first) && $head != $tail; $head++) {
- my $smsg = smsg_for($head, $db, $mid);
+ my @args = ($head, $db, $mid);
+ my $smsg = PublicInbox::SearchMsg->get(@args);
next if $smsg->type ne 'mail';
my $mref = $ibx->msg_by_smsg($smsg) or next;
$first = Email::Simple->new($mref);
diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm
index dd3d58d..b944868 100644
--- a/lib/PublicInbox/SearchMsg.pm
+++ b/lib/PublicInbox/SearchMsg.pm
@@ -24,6 +24,13 @@ sub wrap {
bless { doc => $doc, mime => undef, mid => $mid }, $class;
}
+sub get {
+ my ($class, $head, $db, $mid) = @_;
+ my $doc_id = $head->get_docid;
+ my $doc = $db->get_document($doc_id);
+ load_expand(wrap($class, $doc, $mid))
+}
+
sub get_val ($$) {
my ($doc, $col) = @_;
Search::Xapian::sortable_unserialise($doc->get_value($col));
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 18882af..34ab3e5 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -9,7 +9,7 @@ use warnings;
use PublicInbox::MsgTime qw(msg_datestamp);
use PublicInbox::Hval qw/ascii_html obfuscate_addrs/;
use PublicInbox::Linkify;
-use PublicInbox::MID qw/mid_clean id_compress mid_mime mid_escape/;
+use PublicInbox::MID qw/mid_clean id_compress mid_mime mid_escape mids/;
use PublicInbox::MsgIter;
use PublicInbox::Address;
use PublicInbox::WwwStream;
@@ -21,18 +21,23 @@ use constant TCHILD => '` ';
sub th_pfx ($) { $_[0] == 0 ? '' : TCHILD };
# public functions: (unstable)
+
sub msg_html {
- my ($ctx, $mime) = @_;
+ my ($ctx, $mime, $more) = @_;
my $hdr = $mime->header_obj;
my $ibx = $ctx->{-inbox};
- my $obfs_ibx = $ibx->{obfuscate} ? $ibx : undef;
- my $tip = _msg_html_prepare($hdr, $ctx, $obfs_ibx);
+ my $obfs_ibx = $ctx->{-obfs_ibx} = $ibx->{obfuscate} ? $ibx : undef;
+ my $tip = _msg_html_prepare($hdr, $ctx, $more, 0);
+ my $end = 2;
PublicInbox::WwwStream->response($ctx, 200, sub {
my ($nr, undef) = @_;
if ($nr == 1) {
$tip . multipart_text_as_html($mime, '', $obfs_ibx) .
'</pre><hr>'
- } elsif ($nr == 2) {
+ } elsif ($more && @$more) {
+ ++$end;
+ msg_html_more($ctx, $more, $nr);
+ } elsif ($nr == $end) {
# fake an EOF if generating the footer fails;
# we want to at least show the message if something
# here crashes:
@@ -46,6 +51,63 @@ sub msg_html {
});
}
+sub msg_page {
+ my ($ctx) = @_;
+ my $mid = $ctx->{mid};
+ my $ibx = $ctx->{-inbox};
+ my ($first, $more, $head, $tail, $db);
+ if (my $srch = $ibx->search) {
+ $srch->retry_reopen(sub {
+ ($head, $tail, $db) = $srch->each_smsg_by_mid($mid);
+ for (; !defined($first) && $head != $tail; $head++) {
+ my @args = ($head, $db, $mid);
+ my $smsg = PublicInbox::SearchMsg->get(@args);
+ next if $smsg->type ne 'mail';
+ $first = $ibx->msg_by_smsg($smsg);
+ }
+ if ($head != $tail) {
+ $more = [ $head, $tail, $db ];
+ }
+ });
+ } else {
+ $first = $ibx->msg_by_mid($mid) or return;
+ }
+ $first ? msg_html($ctx, PublicInbox::MIME->new($first), $more) : undef;
+}
+
+sub msg_html_more {
+ my ($ctx, $more, $nr) = @_;
+ my $str = eval {
+ my $mref;
+ my ($head, $tail, $db) = @$more;
+ for (; !defined($mref) && $head != $tail; $head++) {
+ my $smsg = PublicInbox::SearchMsg->get($head, $db,
+ $ctx->{mid});
+ next if $smsg->type ne 'mail';
+ $mref = $ctx->{-inbox}->msg_by_smsg($smsg);
+ }
+ if ($head == $tail) { # done
+ @$more = ();
+ } else {
+ $more->[0] = $head;
+ }
+ if ($mref) {
+ my $mime = PublicInbox::MIME->new($mref);
+ _msg_html_prepare($mime->header_obj, $ctx, $more, $nr) .
+ multipart_text_as_html($mime, '',
+ $ctx->{-obfs_ibx}) .
+ '</pre><hr>'
+ } else {
+ '';
+ }
+ };
+ if ($@) {
+ warn "Error lookup up additional messages: $@\n";
+ $str = '<pre>Error looking up additional messages</pre>';
+ }
+ $str;
+}
+
# /$INBOX/$MESSAGE_ID/#R
sub msg_reply {
my ($ctx, $hdr) = @_;
@@ -529,17 +591,26 @@ sub add_text_body {
}
sub _msg_html_prepare {
- my ($hdr, $ctx, $obfs_ibx) = @_;
+ my ($hdr, $ctx, $more, $nr) = @_;
my $srch = $ctx->{srch} if $ctx;
my $atom = '';
- my $rv = "<pre\nid=b>"; # anchor for body start
-
+ my $obfs_ibx = $ctx->{-obfs_ibx};
+ my $rv = '';
+ my $mids = mids($hdr);
+ my $multiple = scalar(@$mids) > 1; # zero, one, infinity
+ if ($nr == 0) {
+ if ($more) {
+ $rv .=
+"<pre>WARNING: multiple messages refer to this Message-ID\n</pre>";
+ }
+ $rv .= "<pre\nid=b>"; # anchor for body start
+ } else {
+ $rv .= '<pre>';
+ }
if ($srch) {
$ctx->{-upfx} = '../';
}
my @title;
- my $mid = mid_clean($hdr->header_raw('Message-ID'));
- $mid = PublicInbox::Hval->new_msgid($mid);
foreach my $h (qw(From To Cc Subject Date)) {
my $v = $hdr->header($h);
defined($v) && ($v ne '') or next;
@@ -564,8 +635,20 @@ sub _msg_html_prepare {
}
$title[0] ||= '(no subject)';
$ctx->{-title_html} = join(' - ', @title);
- $rv .= 'Message-ID: <' . $mid->as_html . '> ';
- $rv .= "(<a\nhref=\"raw\">raw</a>)\n";
+ foreach (@$mids) {
+ my $mid = PublicInbox::Hval->new_msgid($_) ;
+ my $mhtml = $mid->as_html;
+ if ($multiple) {
+ my $href = $mid->{href};
+ $rv .= "Message-ID: ";
+ $rv .= "<a\nhref=\"../$href/\">";
+ $rv .= "<$mhtml></a> ";
+ $rv .= "(<a\nhref=\"../$href/raw\">raw</a>)\n";
+ } else {
+ $rv .= "Message-ID: <$mhtml> ";
+ $rv .= "(<a\nhref=\"raw\">raw</a>)\n";
+ }
+ }
$rv .= _parent_headers($hdr, $srch);
$rv .= "\n";
}
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index f86363c..7bf866f 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -225,13 +225,8 @@ sub get_mid_txt {
# /$INBOX/$MESSAGE_ID/ -> HTML content (short quotes)
sub get_mid_html {
my ($ctx) = @_;
- my $x = mid2blob($ctx) or return r404($ctx);
-
require PublicInbox::View;
- require PublicInbox::MIME;
- my $mime = PublicInbox::MIME->new($x);
- searcher($ctx);
- PublicInbox::View::msg_html($ctx, $mime);
+ PublicInbox::View::msg_page($ctx) || r404($ctx);
}
# /$INBOX/$MESSAGE_ID/t/
diff --git a/t/psgi_v2.t b/t/psgi_v2.t
index 1e45c26..eaa3218 100644
--- a/t/psgi_v2.t
+++ b/t/psgi_v2.t
@@ -127,6 +127,18 @@ test_psgi(sub { $www->call(@_) }, sub {
@from_ = ($raw =~ m/^From /mg);
is(scalar(@from_), 3, 'three From_ lines in t.mbox.gz');
};
+
+ local $SIG{__WARN__} = 'DEFAULT';
+ $res = $cb->(GET('/v2test/a-mid@b/'));
+ $raw = $res->content;
+ like($raw, qr/^hello world$/m, 'got first message');
+ like($raw, qr/^hello world!$/m, 'got second message');
+ like($raw, qr/^hello ghosts$/m, 'got third message');
+ @from_ = ($raw =~ m/>From: /mg);
+ is(scalar(@from_), 3, 'three From: lines');
+ foreach my $mid ('a-mid@b', $new_mid, $third) {
+ like($raw, qr/<\Q$mid\E>/s, "Message-ID $mid shown");
+ }
});
done_testing();
--
EW
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 07/11] searchidx: warn about vivifying multiple ghosts
2018-03-27 11:11 [PATCH 00/11] duplicate support in UI + tests Eric Wong (Contractor, The Linux Foundation)
` (5 preceding siblings ...)
2018-03-27 11:11 ` [PATCH 06/11] view: permalink (per-message) view shows multiple messages Eric Wong (Contractor, The Linux Foundation)
@ 2018-03-27 11:11 ` Eric Wong (Contractor, The Linux Foundation)
2018-03-27 11:11 ` [PATCH 08/11] v2writable: warn on unseen deleted files Eric Wong (Contractor, The Linux Foundation)
` (3 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Eric Wong (Contractor, The Linux Foundation) @ 2018-03-27 11:11 UTC (permalink / raw)
To: meta
This should help us detect bugs sooner in case we have
space waste problems.
---
lib/PublicInbox/SearchIdx.pm | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 7ac16ec..446cfb0 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -561,6 +561,10 @@ sub link_and_save {
1;
});
}
+ if ($vivified > 1) {
+ my $id = '<'.join('> <', @$mids).'>';
+ warn "BUG: vivified multiple ($vivified) ghosts for $id\n";
+ }
# not really important, but we return any vivified ghost docid, here:
return $doc_id if defined $doc_id;
link_doc($self, $doc, $refs, $old_tid);
--
EW
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 08/11] v2writable: warn on unseen deleted files
2018-03-27 11:11 [PATCH 00/11] duplicate support in UI + tests Eric Wong (Contractor, The Linux Foundation)
` (6 preceding siblings ...)
2018-03-27 11:11 ` [PATCH 07/11] searchidx: warn about vivifying multiple ghosts Eric Wong (Contractor, The Linux Foundation)
@ 2018-03-27 11:11 ` Eric Wong (Contractor, The Linux Foundation)
2018-03-27 11:11 ` [PATCH 09/11] www: get rid of unnecessary 'inbox' name reference Eric Wong (Contractor, The Linux Foundation)
` (2 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Eric Wong (Contractor, The Linux Foundation) @ 2018-03-27 11:11 UTC (permalink / raw)
To: meta
It would be a bug to have deleted files marked but not
seen in our histories.
---
lib/PublicInbox/V2Writable.pm | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index b04e6fb..01ec98a 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -495,7 +495,7 @@ sub mark_deleted {
my $mids = mids($mime->header_obj);
my $cid = content_id($mime);
foreach my $mid (@$mids) {
- $D->{$mid.$cid} = 1;
+ $D->{"$mid\0$cid"} = 1;
}
}
@@ -513,7 +513,7 @@ sub reindex_oid {
my $num = -1;
my $del = 0;
foreach my $mid (@$mids) {
- $del += (delete $D->{$mid.$cid} || 0);
+ $del += (delete $D->{"$mid\0$cid"} || 0);
my $n = $mm_tmp->num_for($mid);
if (defined $n && $n > $num) {
$mid0 = $mid;
@@ -633,6 +633,14 @@ sub reindex {
}
my ($min, $max) = $mm_tmp->minmax;
defined $max and die "leftover article numbers at $min..$max\n";
+ my @d = sort keys %$D;
+ if (@d) {
+ warn "BUG: ", scalar(@d)," unseen deleted messages marked\n";
+ foreach (@d) {
+ my ($mid, undef) = split(/\0/, $_, 2);
+ warn "<$mid>\n";
+ }
+ }
}
1;
--
EW
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 09/11] www: get rid of unnecessary 'inbox' name reference
2018-03-27 11:11 [PATCH 00/11] duplicate support in UI + tests Eric Wong (Contractor, The Linux Foundation)
` (7 preceding siblings ...)
2018-03-27 11:11 ` [PATCH 08/11] v2writable: warn on unseen deleted files Eric Wong (Contractor, The Linux Foundation)
@ 2018-03-27 11:11 ` Eric Wong (Contractor, The Linux Foundation)
2018-03-27 11:11 ` [PATCH 10/11] searchview: remove unnecessary imports from MID module Eric Wong (Contractor, The Linux Foundation)
2018-03-27 11:11 ` [PATCH 11/11] view: depend on SearchMsg for Message-ID Eric Wong (Contractor, The Linux Foundation)
10 siblings, 0 replies; 12+ messages in thread
From: Eric Wong (Contractor, The Linux Foundation) @ 2018-03-27 11:11 UTC (permalink / raw)
To: meta
We use the actual Inbox object everywhere else and don't
need the name of the inbox separated from the object.
---
lib/PublicInbox/WWW.pm | 1 -
1 file changed, 1 deletion(-)
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 7bf866f..f5ed271 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -152,7 +152,6 @@ sub invalid_inbox ($$) {
if (defined $obj) {
$ctx->{git} = $obj->git;
$ctx->{-inbox} = $obj;
- $ctx->{inbox} = $inbox;
return;
}
--
EW
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 10/11] searchview: remove unnecessary imports from MID module
2018-03-27 11:11 [PATCH 00/11] duplicate support in UI + tests Eric Wong (Contractor, The Linux Foundation)
` (8 preceding siblings ...)
2018-03-27 11:11 ` [PATCH 09/11] www: get rid of unnecessary 'inbox' name reference Eric Wong (Contractor, The Linux Foundation)
@ 2018-03-27 11:11 ` Eric Wong (Contractor, The Linux Foundation)
2018-03-27 11:11 ` [PATCH 11/11] view: depend on SearchMsg for Message-ID Eric Wong (Contractor, The Linux Foundation)
10 siblings, 0 replies; 12+ messages in thread
From: Eric Wong (Contractor, The Linux Foundation) @ 2018-03-27 11:11 UTC (permalink / raw)
To: meta
We do not need many of these, anymore.
---
lib/PublicInbox/SearchView.pm | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index 55c588c..6e537b4 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -10,7 +10,7 @@ use PublicInbox::SearchMsg;
use PublicInbox::Hval qw/ascii_html obfuscate_addrs/;
use PublicInbox::View;
use PublicInbox::WwwAtomStream;
-use PublicInbox::MID qw(mid2path mid_mime mid_clean mid_escape MID_ESC);
+use PublicInbox::MID qw(MID_ESC);
use PublicInbox::MIME;
require PublicInbox::Git;
require PublicInbox::SearchThread;
--
EW
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 11/11] view: depend on SearchMsg for Message-ID
2018-03-27 11:11 [PATCH 00/11] duplicate support in UI + tests Eric Wong (Contractor, The Linux Foundation)
` (9 preceding siblings ...)
2018-03-27 11:11 ` [PATCH 10/11] searchview: remove unnecessary imports from MID module Eric Wong (Contractor, The Linux Foundation)
@ 2018-03-27 11:11 ` Eric Wong (Contractor, The Linux Foundation)
10 siblings, 0 replies; 12+ messages in thread
From: Eric Wong (Contractor, The Linux Foundation) @ 2018-03-27 11:11 UTC (permalink / raw)
To: meta
Since we need to handle messages with multiple and duplicate
Message-ID headers, our thread skeleton display must account
for that.
Since we have a "preferred" Message-ID in case of conflicts,
use it as the UUID in an Atom feed so readers do not get
confused by conflicts.
---
lib/PublicInbox/Feed.pm | 36 ++++++++--------
lib/PublicInbox/Inbox.pm | 9 ++++
lib/PublicInbox/SearchMsg.pm | 6 +--
lib/PublicInbox/SearchView.pm | 14 +++---
lib/PublicInbox/View.pm | 93 ++++++++++++++++++----------------------
lib/PublicInbox/WWW.pm | 1 +
lib/PublicInbox/WwwAtomStream.pm | 9 ++--
t/psgi_v2.t | 26 +++++++++++
8 files changed, 109 insertions(+), 85 deletions(-)
diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index 74d0bbd..f2285a6 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -8,18 +8,18 @@ use warnings;
use PublicInbox::MIME;
use PublicInbox::View;
use PublicInbox::WwwAtomStream;
+use PublicInbox::SearchMsg; # this loads w/o Search::Xapian
# main function
sub generate {
my ($ctx) = @_;
- my $oids = recent_blobs($ctx);
- return _no_thread() unless @$oids;
+ my $msgs = recent_msgs($ctx);
+ return _no_thread() unless @$msgs;
- my $git = $ctx->{-inbox}->git;
+ my $ibx = $ctx->{-inbox};
PublicInbox::WwwAtomStream->response($ctx, 200, sub {
- while (my $oid = shift @$oids) {
- my $msg = $git->cat_file($oid) or next;
- return PublicInbox::MIME->new($msg);
+ while (my $smsg = shift @$msgs) {
+ $ibx->smsg_mime($smsg) and return $smsg;
}
});
}
@@ -36,9 +36,8 @@ sub generate_thread_atom {
$ctx->{-html_url} = $html_url;
my $msgs = $res->{msgs};
PublicInbox::WwwAtomStream->response($ctx, 200, sub {
- while (my $msg = shift @$msgs) {
- $msg = $ibx->msg_by_smsg($msg) and
- return PublicInbox::MIME->new($msg);
+ while (my $smsg = shift @$msgs) {
+ $ibx->smsg_mime($smsg) and return $smsg;
}
});
}
@@ -62,20 +61,19 @@ sub generate_html_index {
sub new_html {
my ($ctx) = @_;
- my $oids = recent_blobs($ctx);
- if (!@$oids) {
+ my $msgs = recent_msgs($ctx);
+ if (!@$msgs) {
return [404, ['Content-Type', 'text/plain'],
["No messages, yet\n"] ];
}
$ctx->{-html_tip} = '<pre>';
$ctx->{-upfx} = '';
$ctx->{-hr} = 1;
- my $git = $ctx->{-inbox}->git;
+ my $ibx = $ctx->{-inbox};
PublicInbox::WwwStream->response($ctx, 200, sub {
- while (my $oid = shift @$oids) {
- my $msg = $git->cat_file($oid) or next;
- my $m = PublicInbox::MIME->new($msg);
- my $more = scalar @$oids;
+ while (my $smsg = shift @$msgs) {
+ my $m = $ibx->smsg_mime($smsg) or next;
+ my $more = scalar @$msgs;
return PublicInbox::View::index_entry($m, $ctx, $more);
}
new_html_footer($ctx);
@@ -103,7 +101,7 @@ sub new_html_footer {
"<hr><pre>page: $next$latest</pre>";
}
-sub recent_blobs {
+sub recent_msgs {
my ($ctx) = @_;
my $ibx = $ctx->{-inbox};
my $max = $ibx->{feedmax};
@@ -119,7 +117,7 @@ sub recent_blobs {
my $res = $srch->query('', { limit => $max, offset => $o });
my $next = $o + $max;
$ctx->{next_page} = "o=$next" if $res->{total} >= $next;
- return [ map { $_->{blob} } @{$res->{msgs}} ];
+ return $res->{msgs};
}
my $hex = '[a-f0-9]';
@@ -171,7 +169,7 @@ sub recent_blobs {
}
$ctx->{next_page} = "r=$last_commit" if $last_commit;
- \@oids;
+ [ map { bless {blob => $_ }, 'PublicInbox::SearchMsg' } @oids ];
}
1;
diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm
index 666c81d..b1ea8dc 100644
--- a/lib/PublicInbox/Inbox.pm
+++ b/lib/PublicInbox/Inbox.pm
@@ -8,6 +8,7 @@ use warnings;
use PublicInbox::Git;
use PublicInbox::MID qw(mid2path);
use Devel::Peek qw(SvREFCNT);
+use PublicInbox::MIME;
my $cleanup_timer;
eval {
@@ -246,6 +247,14 @@ sub msg_by_smsg ($$;$) {
$str;
}
+sub smsg_mime {
+ my ($self, $smsg, $ref) = @_;
+ if (my $s = msg_by_smsg($self, $smsg, $ref)) {
+ $smsg->{mime} = PublicInbox::MIME->new($s);
+ return $smsg;
+ }
+}
+
sub path_check {
my ($self, $path) = @_;
git($self)->check('HEAD:'.$path);
diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm
index b944868..e314fed 100644
--- a/lib/PublicInbox/SearchMsg.pm
+++ b/lib/PublicInbox/SearchMsg.pm
@@ -6,7 +6,6 @@
package PublicInbox::SearchMsg;
use strict;
use warnings;
-use Search::Xapian;
use PublicInbox::MID qw/mid_clean mid_mime/;
use PublicInbox::Address;
use PublicInbox::MsgTime qw(msg_timestamp msg_datestamp);
@@ -165,9 +164,10 @@ sub mid ($;$) {
$self->{mid} = $mid;
} elsif (my $rv = $self->{mid}) {
$rv;
+ } elsif ($self->{doc}) {
+ $self->{mid} = _get_term_val($self, 'Q', qr/\AQ/);
} else {
- $self->{mid} = _get_term_val($self, 'Q', qr/\AQ/) ||
- $self->_extract_mid;
+ $self->_extract_mid; # v1 w/o Xapian
}
}
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index 6e537b4..1a8fe7f 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -248,15 +248,14 @@ sub mset_thread {
*PublicInbox::View::pre_thread);
@$msgs = reverse @$msgs if $r;
- my $mime;
sub {
return unless $msgs;
- while ($mime = pop @$msgs) {
- $mime = $inbox->msg_by_smsg($mime) and last;
+ my $smsg;
+ while (my $m = pop @$msgs) {
+ $smsg = $inbox->smsg_mime($m) and last;
}
- if ($mime) {
- $mime = PublicInbox::MIME->new($mime);
- return PublicInbox::View::index_entry($mime, $ctx,
+ if ($smsg) {
+ return PublicInbox::View::index_entry($smsg, $ctx,
scalar @$msgs);
}
$msgs = undef;
@@ -290,8 +289,7 @@ sub adump {
PublicInbox::WwwAtomStream->response($ctx, 200, sub {
while (my $x = shift @items) {
$x = load_doc_retry($srch, $x);
- $x = $ibx->msg_by_smsg($x) and
- return PublicInbox::MIME->new($x);
+ $x = $ibx->smsg_mime($x) and return $x;
}
return undef;
});
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 34ab3e5..5fb2b31 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -9,7 +9,8 @@ use warnings;
use PublicInbox::MsgTime qw(msg_datestamp);
use PublicInbox::Hval qw/ascii_html obfuscate_addrs/;
use PublicInbox::Linkify;
-use PublicInbox::MID qw/mid_clean id_compress mid_mime mid_escape mids/;
+use PublicInbox::MID qw/mid_clean id_compress mid_mime mid_escape mids
+ references/;
use PublicInbox::MsgIter;
use PublicInbox::Address;
use PublicInbox::WwwStream;
@@ -69,30 +70,31 @@ sub msg_page {
$more = [ $head, $tail, $db ];
}
});
+ return unless $first;
} else {
$first = $ibx->msg_by_mid($mid) or return;
}
- $first ? msg_html($ctx, PublicInbox::MIME->new($first), $more) : undef;
+ msg_html($ctx, PublicInbox::MIME->new($first), $more);
}
sub msg_html_more {
my ($ctx, $more, $nr) = @_;
my $str = eval {
- my $mref;
+ my $smsg;
my ($head, $tail, $db) = @$more;
- for (; !defined($mref) && $head != $tail; $head++) {
- my $smsg = PublicInbox::SearchMsg->get($head, $db,
- $ctx->{mid});
- next if $smsg->type ne 'mail';
- $mref = $ctx->{-inbox}->msg_by_smsg($smsg);
+ my $mid = $ctx->{mid};
+ for (; !defined($smsg) && $head != $tail; $head++) {
+ my $m = PublicInbox::SearchMsg->get($head, $db, $mid);
+ next if $m->type ne 'mail';
+ $smsg = $ctx->{-inbox}->smsg_mime($m);
}
if ($head == $tail) { # done
@$more = ();
} else {
$more->[0] = $head;
}
- if ($mref) {
- my $mime = PublicInbox::MIME->new($mref);
+ if ($smsg) {
+ my $mime = $smsg->{mime};
_msg_html_prepare($mime->header_obj, $ctx, $more, $nr) .
multipart_text_as_html($mime, '',
$ctx->{-obfs_ibx}) .
@@ -167,14 +169,8 @@ EOF
sub in_reply_to {
my ($hdr) = @_;
- my %mid = map { $_ => 1 } $hdr->header_raw('Message-ID');
- my @refs = (($hdr->header_raw('References') || '') =~ /<([^>]+)>/g);
- push(@refs, (($hdr->header_raw('In-Reply-To') || '') =~ /<([^>]+)>/g));
- while (defined(my $irt = pop @refs)) {
- next if $mid{"<$irt>"};
- return $irt;
- }
- undef;
+ my $refs = references($hdr);
+ $refs->[-1];
}
sub _hdr_names_html ($$) {
@@ -191,12 +187,10 @@ sub nr_to_s ($$$) {
# this is already inside a <pre>
sub index_entry {
- my ($mime, $ctx, $more) = @_;
+ my ($smsg, $ctx, $more) = @_;
my $srch = $ctx->{srch};
- my $hdr = $mime->header_obj;
- my $subj = $hdr->header('Subject');
-
- my $mid_raw = mid_clean(mid_mime($mime));
+ my $subj = $smsg->subject;
+ my $mid_raw = $smsg->mid;
my $id = id_compress($mid_raw, 1);
my $id_m = 'm'.$id;
@@ -211,6 +205,8 @@ sub index_entry {
$rv .= $subj . "\n";
$rv .= _th_index_lite($mid_raw, \$irt, $id, $ctx);
my @tocc;
+ my $mime = $smsg->{mime};
+ my $hdr = $mime->header_obj;
foreach my $f (qw(To Cc)) {
my $dst = _hdr_names_html($hdr, $f);
if ($dst ne '') {
@@ -220,7 +216,7 @@ sub index_entry {
}
my $from = _hdr_names_html($hdr, 'From');
obfuscate_addrs($obfs_ibx, $from) if $obfs_ibx;
- $rv .= "From: $from @ "._msg_date($hdr)." UTC";
+ $rv .= "From: $from @ ".fmt_ts($smsg->ds)." UTC";
my $upfx = $ctx->{-upfx};
my $mhref = $upfx . mid_escape($mid_raw) . '/';
$rv .= qq{ (<a\nhref="$mhref">permalink</a> / };
@@ -363,30 +359,30 @@ sub pre_thread {
}
sub thread_index_entry {
- my ($ctx, $level, $mime) = @_;
+ my ($ctx, $level, $smsg) = @_;
my ($beg, $end) = thread_adj_level($ctx, $level);
- $beg . '<pre>' . index_entry($mime, $ctx, 0) . '</pre>' . $end;
+ $beg . '<pre>' . index_entry($smsg, $ctx, 0) . '</pre>' . $end;
}
sub stream_thread ($$) {
my ($rootset, $ctx) = @_;
my $inbox = $ctx->{-inbox};
- my $mime;
my @q = map { (0, $_) } @$rootset;
my $level;
+ my $smsg;
while (@q) {
$level = shift @q;
my $node = shift @q or next;
my $cl = $level + 1;
unshift @q, map { ($cl, $_) } @{$node->{children}};
- $mime = $inbox->msg_by_smsg($node->{smsg}) and last;
+ $smsg = $inbox->smsg_mime($node->{smsg}) and last;
}
- return missing_thread($ctx) unless $mime;
+ return missing_thread($ctx) unless $smsg;
$ctx->{-obfs_ibx} = $inbox->{obfuscate} ? $inbox : undef;
- $mime = PublicInbox::MIME->new($mime);
- $ctx->{-title_html} = ascii_html($mime->header('Subject'));
- $ctx->{-html_tip} = thread_index_entry($ctx, $level, $mime);
+ $ctx->{-title_html} = ascii_html($smsg->subject);
+ $ctx->{-html_tip} = thread_index_entry($ctx, $level, $smsg);
+ $smsg = undef;
PublicInbox::WwwStream->response($ctx, 200, sub {
return unless $ctx;
while (@q) {
@@ -394,10 +390,8 @@ sub stream_thread ($$) {
my $node = shift @q or next;
my $cl = $level + 1;
unshift @q, map { ($cl, $_) } @{$node->{children}};
- my $mid = $node->{id};
- if ($mime = $inbox->msg_by_smsg($node->{smsg})) {
- $mime = PublicInbox::MIME->new($mime);
- return thread_index_entry($ctx, $level, $mime);
+ if ($smsg = $inbox->smsg_mime($node->{smsg})) {
+ return thread_index_entry($ctx, $level, $smsg);
} else {
return ghost_index_entry($ctx, $level, $node);
}
@@ -445,24 +439,21 @@ sub thread_html {
return stream_thread($rootset, $ctx) unless $ctx->{flat};
# flat display: lazy load the full message from smsg
- my $mime;
- while ($mime = shift @$msgs) {
- $mime = $inbox->msg_by_smsg($mime) and last;
+ my $smsg;
+ while (my $m = shift @$msgs) {
+ $smsg = $inbox->smsg_mime($m) and last;
}
- return missing_thread($ctx) unless $mime;
- $mime = PublicInbox::MIME->new($mime);
- $ctx->{-title_html} = ascii_html($mime->header('Subject'));
- $ctx->{-html_tip} = '<pre>'.index_entry($mime, $ctx, scalar @$msgs);
- $mime = undef;
+ return missing_thread($ctx) unless $smsg;
+ $ctx->{-title_html} = ascii_html($smsg->subject);
+ $ctx->{-html_tip} = '<pre>'.index_entry($smsg, $ctx, scalar @$msgs);
+ $smsg = undef;
PublicInbox::WwwStream->response($ctx, 200, sub {
return unless $msgs;
- while ($mime = shift @$msgs) {
- $mime = $inbox->msg_by_smsg($mime) and last;
- }
- if ($mime) {
- $mime = PublicInbox::MIME->new($mime);
- return index_entry($mime, $ctx, scalar @$msgs);
+ $smsg = undef;
+ while (my $m = shift @$msgs) {
+ $smsg = $inbox->smsg_mime($m) and last;
}
+ return index_entry($smsg, $ctx, scalar @$msgs) if $smsg;
$msgs = undef;
$skel;
});
@@ -656,7 +647,7 @@ sub _msg_html_prepare {
sub thread_skel {
my ($dst, $ctx, $hdr, $tpfx) = @_;
my $srch = $ctx->{srch};
- my $mid = mid_clean($hdr->header_raw('Message-ID'));
+ my $mid = mids($hdr)->[0];
my $sres = $srch->get_thread($mid);
my $nr = $sres->{total};
my $expand = qq(expand[<a\nhref="${tpfx}T/#u">flat</a>) .
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index f5ed271..a2c2a4a 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -225,6 +225,7 @@ sub get_mid_txt {
sub get_mid_html {
my ($ctx) = @_;
require PublicInbox::View;
+ searcher($ctx);
PublicInbox::View::msg_page($ctx) || r404($ctx);
}
diff --git a/lib/PublicInbox/WwwAtomStream.pm b/lib/PublicInbox/WwwAtomStream.pm
index bb574a7..38eba2a 100644
--- a/lib/PublicInbox/WwwAtomStream.pm
+++ b/lib/PublicInbox/WwwAtomStream.pm
@@ -33,8 +33,8 @@ sub response {
sub getline {
my ($self) = @_;
if (my $middle = $self->{cb}) {
- my $mime = $middle->();
- return feed_entry($self, $mime) if $mime;
+ my $smsg = $middle->();
+ return feed_entry($self, $smsg) if $smsg;
}
delete $self->{cb} ? '</feed>' : undef;
}
@@ -92,10 +92,11 @@ sub mid2uuid ($) {
# returns undef or string
sub feed_entry {
- my ($self, $mime) = @_;
+ my ($self, $smsg) = @_;
my $ctx = $self->{ctx};
+ my $mime = $smsg->{mime};
my $hdr = $mime->header_obj;
- my $mid = mid_clean($hdr->header_raw('Message-ID'));
+ my $mid = $smsg->mid;
my $irt = PublicInbox::View::in_reply_to($hdr);
my $uuid = mid2uuid($mid);
my $base = $ctx->{feed_base_url};
diff --git a/t/psgi_v2.t b/t/psgi_v2.t
index eaa3218..2a798d6 100644
--- a/t/psgi_v2.t
+++ b/t/psgi_v2.t
@@ -139,6 +139,32 @@ test_psgi(sub { $www->call(@_) }, sub {
foreach my $mid ('a-mid@b', $new_mid, $third) {
like($raw, qr/<\Q$mid\E>/s, "Message-ID $mid shown");
}
+ like($raw, qr/\b3\+ messages\b/, 'thread overview shown');
+
+ my $exp = [ qw(<a-mid@b> <reuse@mid>) ];
+ $mime->header_set('Message-Id', @$exp);
+ $mime->header_set('Subject', '4th dupe');
+ local $SIG{__WARN__} = sub {};
+ ok($im->add($mime), 'added one message');
+ $im->done;
+ my @h = $mime->header('Message-ID');
+ is_deeply($exp, \@h, 'reused existing Message-ID');
+
+ $config->each_inbox(sub { $_[0]->search->reopen });
+
+ $res = $cb->(GET('/v2test/new.atom'));
+ my @ids = ($res->content =~ m!<id>urn:uuid:([^<]+)</id>!sg);
+ my %ids;
+ $ids{$_}++ for @ids;
+ is_deeply([qw(1 1 1 1)], [values %ids], 'feed ids unique');
+
+ $res = $cb->(GET('/v2test/reuse@mid/T/'));
+ $raw = $res->content;
+ like($raw, qr/\b4\+ messages\b/, 'thread overview shown with /T/');
+
+ $res = $cb->(GET('/v2test/reuse@mid/t/'));
+ $raw = $res->content;
+ like($raw, qr/\b4\+ messages\b/, 'thread overview shown with /t/');
});
done_testing();
--
EW
^ permalink raw reply related [flat|nested] 12+ messages in thread