From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH] www: /$INBOX/$MSGID/d/ to diff reused Message-IDs
Date: Wed, 11 Jan 2023 10:55:39 +0000 [thread overview]
Message-ID: <20230111105539.302803-1-e@80x24.org> (raw)
To ensure users aren't abusing the ability to reuse Message-IDs,
provide a convenient front-end to `lei mail-diff' from WWW.
Most of the time it's just list-appended signatures, so I expect
this to be useful for /all/ users.
---
lib/PublicInbox/Hval.pm | 2 +-
lib/PublicInbox/MailDiff.pm | 88 +++++++++++++++++++++++++++++++++++++
lib/PublicInbox/View.pm | 29 +++++++++++-
lib/PublicInbox/WWW.pm | 6 ++-
t/psgi_v2.t | 3 ++
5 files changed, 125 insertions(+), 3 deletions(-)
diff --git a/lib/PublicInbox/Hval.pm b/lib/PublicInbox/Hval.pm
index 00b3c8b4..0677865e 100644
--- a/lib/PublicInbox/Hval.pm
+++ b/lib/PublicInbox/Hval.pm
@@ -118,7 +118,7 @@ $ESCAPES{'/'} = ':'; # common
sub to_attr ($) {
my ($str) = @_;
- # git would never do this to us:
+ # git would never do this to us, mail diff uses // to prevent anchors:
return if index($str, '//') >= 0;
my $first = '';
diff --git a/lib/PublicInbox/MailDiff.pm b/lib/PublicInbox/MailDiff.pm
index 06eb3a0d..0ed06f9a 100644
--- a/lib/PublicInbox/MailDiff.pm
+++ b/lib/PublicInbox/MailDiff.pm
@@ -7,6 +7,8 @@ use PublicInbox::ContentHash qw(content_digest);
use PublicInbox::ContentDigestDbg;
use Data::Dumper ();
use PublicInbox::MsgIter qw(msg_part_text);
+use PublicInbox::ViewDiff qw(flush_diff);
+use PublicInbox::GitAsyncCat;
sub write_part { # Eml->each_part callback
my ($ary, $self) = @_;
@@ -31,6 +33,9 @@ sub dump_eml ($$$) {
mkdir $dir or die "mkdir($dir): $!";
$eml->each_part(\&write_part, $self);
+ return if $self->{ctx}; # don't need content_digest noise in WWW UI
+
+ # XXX is this even useful? perhaps hide it behind a CLI switch
open my $fh, '>', "$dir/content_digest" or die "open: $!";
my $dig = PublicInbox::ContentDigestDbg->new($fh);
local $Data::Dumper::Useqq = 1;
@@ -47,4 +52,87 @@ sub prep_a ($$) {
dump_eml($self, "$self->{tmp}/a", $eml);
}
+sub next_smsg ($) {
+ my ($self) = @_;
+ my $ctx = $self->{ctx};
+ my $over = $ctx->{ibx}->over;
+ $self->{smsg} = $over ? $over->next_by_mid(@{$self->{next_arg}})
+ : $ctx->gone('over');
+ if (!$self->{smsg}) {
+ $ctx->write($ctx->_html_end);
+ return $ctx->close;
+ }
+ my $async = $self->{ctx}->{env}->{'pi-httpd.async'};
+ $async->(undef, undef, $self) if $async # PublicInbox::HTTPD::Async->new
+}
+
+sub emit_msg_diff {
+ my ($bref, $self) = @_; # bref is `git diff' output
+ # will be escaped to `•' in HTML
+ $self->{ctx}->{ibx}->{obfuscate} and
+ obfuscate_addrs($self->{ctx}->{ibx}, $$bref, "\x{2022}");
+ $$bref =~ s/\r+\n/\n/sg;
+ print { $self->{ctx}->{zfh} } '</pre><hr><pre>' if $self->{nr} > 1;
+ flush_diff($self->{ctx}, $bref);
+ next_smsg($self);
+}
+
+sub do_diff {
+ my ($self, $eml) = @_;
+ my $n = 'N'.(++$self->{nr});
+ my $dir = "$self->{tmp}/$n";
+ $self->dump_eml($dir, $eml);
+ my $cmd = [ qw(git diff --no-index --no-color -- a), $n ];
+ my $opt = { -C => "$self->{tmp}", quiet => 1 };
+ my $qsp = PublicInbox::Qspawn->new($cmd, undef, $opt);
+ $qsp->psgi_qx($self->{ctx}->{env}, undef, \&emit_msg_diff, $self);
+}
+
+sub diff_msg_i {
+ my ($self, $eml) = @_;
+ if ($eml) {
+ if ($self->{tmp}) { # 2nd..last message
+ do_diff($self, $eml);
+ } else { # first message:
+ prep_a($self, $eml);
+ next_smsg($self);
+ }
+ } else {
+ warn "W: $self->{smsg}->{blob} missing\n";
+ next_smsg($self);
+ }
+}
+
+sub diff_msg_i_async {
+ my ($bref, $oid, $type, $size, $self) = @_;
+ diff_msg_i($self, $bref ? PublicInbox::Eml->new($bref) : undef);
+}
+
+sub event_step {
+ my ($self) = @_;
+ eval {
+ my $ctx = $self->{ctx};
+ if ($ctx->{env}->{'pi-httpd.async'}) {
+ ibx_async_cat($ctx->{ibx}, $self->{smsg}->{blob},
+ \&diff_msg_i_async, $self);
+ } else {
+ diff_msg_i($self, $ctx->{ibx}->smsg_eml($self->{smsg}));
+ }
+ };
+ if ($@) {
+ warn "E: $@";
+ delete $self->{smsg};
+ $self->{ctx}->close;
+ }
+}
+
+sub begin_mail_diff {
+ my ($self) = @_;
+ if (my $async = $self->{ctx}->{env}->{'pi-httpd.async'}) {
+ $async->(undef, undef, $self); # PublicInbox::HTTPD::Async->new
+ } else {
+ event_step($self) while $self->{smsg};
+ }
+}
+
1;
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 071a2093..b8d6d85e 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -623,7 +623,8 @@ sub _msg_page_prepare {
return;
}
$ctx->{-html_tip} =
-"<pre>WARNING: multiple messages have this Message-ID\n</pre><pre>";
+qq[<pre>WARNING: multiple messages have this Message-ID (<a
+href="d/">diff</a>)</pre><pre>];
} else {
$ctx->{first_hdr} = $eml->header_obj;
$ctx->{chash} = content_hash($eml) if $ctx->{smsg}; # reused MID
@@ -1225,4 +1226,30 @@ sub ghost_index_entry {
. '</pre>' . $end;
}
+# /$INBOX/$MSGID/d/ endpoint
+sub diff_msg {
+ my ($ctx) = @_;
+ require PublicInbox::MailDiff;
+ my $ibx = $ctx->{ibx};
+ my $over = $ibx->over or return no_over_html($ctx);
+ my ($id, $prev);
+ my $md = bless { ctx => $ctx }, 'PublicInbox::MailDiff';
+ my $next_arg = $md->{next_arg} = [ $ctx->{mid}, \$id, \$prev ];
+ my $smsg = $md->{smsg} = $over->next_by_mid(@$next_arg) or
+ return; # undef == 404
+ $ctx->{-t_max} = $smsg->{ts};
+ $ctx->{-upfx} = '../../';
+ $ctx->{-apfx} = '//'; # fail on to_attr()
+ $ctx->{-linkify} = PublicInbox::Linkify->new;
+ my $mid = ascii_html($smsg->{mid});
+ $ctx->{-title_html} = "diff for duplicates of <$mid>";
+ PublicInbox::WwwStream::html_init($ctx);
+ print { $ctx->{zfh} } '<pre>diff for duplicates of <<a href="../">',
+ $mid, "</a>>\n\n";
+ sub {
+ $ctx->attach($_[0]->([200, delete $ctx->{-res_hdr}]));
+ $md->begin_mail_diff;
+ };
+}
+
1;
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index f861b192..9ffcb879 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -25,7 +25,7 @@ use PublicInbox::Eml;
# TODO: consider a routing tree now that we have more endpoints:
our $INBOX_RE = qr!\A/([\w\-][\w\.\-\+]*)!;
our $MID_RE = qr!([^/]+)!;
-our $END_RE = qr!(T/|t/|t\.mbox(?:\.gz)?|t\.atom|raw|)!;
+our $END_RE = qr!(T/|t/|d/|t\.mbox(?:\.gz)?|t\.atom|raw|)!;
our $ATTACH_RE = qr!([0-9][0-9\.]*)-($PublicInbox::Hval::FN)!;
our $OID_RE = qr![a-f0-9]{7,}!;
@@ -452,6 +452,10 @@ sub msg_page {
# legacy, but no redirect for compatibility:
'f/' eq $e and return get_mid_html($ctx);
+ if ($e eq 'd/') {
+ require PublicInbox::View;
+ return PublicInbox::View::diff_msg($ctx);
+ }
r404($ctx);
}
diff --git a/t/psgi_v2.t b/t/psgi_v2.t
index 6b1b3a39..f709c3c7 100644
--- a/t/psgi_v2.t
+++ b/t/psgi_v2.t
@@ -220,6 +220,9 @@ my $client1 = sub {
like($raw, qr!>\Q$mid\E</a>!s, "Message-ID $mid shown");
}
like($raw, qr/\b3\+ messages\b/, 'thread overview shown');
+
+ $res = $cb->(GET('/v2test/a-mid@b/d/'));
+ is($res->code, 200, '/d/ (diff) endpoint works');
};
test_psgi(sub { $www->call(@_) }, $client1);
next reply other threads:[~2023-01-11 10:55 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-01-11 10:55 Eric Wong [this message]
2023-01-11 11:00 ` [1/2 PATCH] hoist MailDiff and ContentDigestDbg out of lei Eric Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230111105539.302803-1-e@80x24.org \
--to=e@80x24.org \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).