unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
* [PATCH] www: /$INBOX/$MSGID/d/ to diff reused Message-IDs
@ 2023-01-11 10:55 Eric Wong
  2023-01-11 11:00 ` [1/2 PATCH] hoist MailDiff and ContentDigestDbg out of lei Eric Wong
  0 siblings, 1 reply; 2+ messages in thread
From: Eric Wong @ 2023-01-11 10:55 UTC (permalink / raw)
  To: meta

To ensure users aren't abusing the ability to reuse Message-IDs,
provide a convenient front-end to `lei mail-diff' from WWW.
Most of the time it's just list-appended signatures, so I expect
this to be useful for /all/ users.
---
 lib/PublicInbox/Hval.pm     |  2 +-
 lib/PublicInbox/MailDiff.pm | 88 +++++++++++++++++++++++++++++++++++++
 lib/PublicInbox/View.pm     | 29 +++++++++++-
 lib/PublicInbox/WWW.pm      |  6 ++-
 t/psgi_v2.t                 |  3 ++
 5 files changed, 125 insertions(+), 3 deletions(-)

diff --git a/lib/PublicInbox/Hval.pm b/lib/PublicInbox/Hval.pm
index 00b3c8b4..0677865e 100644
--- a/lib/PublicInbox/Hval.pm
+++ b/lib/PublicInbox/Hval.pm
@@ -118,7 +118,7 @@ $ESCAPES{'/'} = ':'; # common
 sub to_attr ($) {
 	my ($str) = @_;
 
-	# git would never do this to us:
+	# git would never do this to us, mail diff uses // to prevent anchors:
 	return if index($str, '//') >= 0;
 
 	my $first = '';
diff --git a/lib/PublicInbox/MailDiff.pm b/lib/PublicInbox/MailDiff.pm
index 06eb3a0d..0ed06f9a 100644
--- a/lib/PublicInbox/MailDiff.pm
+++ b/lib/PublicInbox/MailDiff.pm
@@ -7,6 +7,8 @@ use PublicInbox::ContentHash qw(content_digest);
 use PublicInbox::ContentDigestDbg;
 use Data::Dumper ();
 use PublicInbox::MsgIter qw(msg_part_text);
+use PublicInbox::ViewDiff qw(flush_diff);
+use PublicInbox::GitAsyncCat;
 
 sub write_part { # Eml->each_part callback
 	my ($ary, $self) = @_;
@@ -31,6 +33,9 @@ sub dump_eml ($$$) {
 	mkdir $dir or die "mkdir($dir): $!";
 	$eml->each_part(\&write_part, $self);
 
+	return if $self->{ctx}; # don't need content_digest noise in WWW UI
+
+	# XXX is this even useful?  perhaps hide it behind a CLI switch
 	open my $fh, '>', "$dir/content_digest" or die "open: $!";
 	my $dig = PublicInbox::ContentDigestDbg->new($fh);
 	local $Data::Dumper::Useqq = 1;
@@ -47,4 +52,87 @@ sub prep_a ($$) {
 	dump_eml($self, "$self->{tmp}/a", $eml);
 }
 
+sub next_smsg ($) {
+	my ($self) = @_;
+	my $ctx = $self->{ctx};
+	my $over = $ctx->{ibx}->over;
+	$self->{smsg} = $over ? $over->next_by_mid(@{$self->{next_arg}})
+			: $ctx->gone('over');
+	if (!$self->{smsg}) {
+		$ctx->write($ctx->_html_end);
+		return $ctx->close;
+	}
+	my $async = $self->{ctx}->{env}->{'pi-httpd.async'};
+	$async->(undef, undef, $self) if $async # PublicInbox::HTTPD::Async->new
+}
+
+sub emit_msg_diff {
+	my ($bref, $self) = @_; # bref is `git diff' output
+	# will be escaped to `•' in HTML
+	$self->{ctx}->{ibx}->{obfuscate} and
+		obfuscate_addrs($self->{ctx}->{ibx}, $$bref, "\x{2022}");
+	$$bref =~ s/\r+\n/\n/sg;
+	print { $self->{ctx}->{zfh} } '</pre><hr><pre>' if $self->{nr} > 1;
+	flush_diff($self->{ctx}, $bref);
+	next_smsg($self);
+}
+
+sub do_diff {
+	my ($self, $eml) = @_;
+	my $n = 'N'.(++$self->{nr});
+	my $dir = "$self->{tmp}/$n";
+	$self->dump_eml($dir, $eml);
+	my $cmd = [ qw(git diff --no-index --no-color -- a), $n ];
+	my $opt = { -C => "$self->{tmp}", quiet => 1 };
+	my $qsp = PublicInbox::Qspawn->new($cmd, undef, $opt);
+	$qsp->psgi_qx($self->{ctx}->{env}, undef, \&emit_msg_diff, $self);
+}
+
+sub diff_msg_i {
+	my ($self, $eml) = @_;
+	if ($eml) {
+		if ($self->{tmp}) { # 2nd..last message
+			do_diff($self, $eml);
+		} else { # first message:
+			prep_a($self, $eml);
+			next_smsg($self);
+		}
+	} else {
+		warn "W: $self->{smsg}->{blob} missing\n";
+		next_smsg($self);
+	}
+}
+
+sub diff_msg_i_async {
+	my ($bref, $oid, $type, $size, $self) = @_;
+	diff_msg_i($self, $bref ? PublicInbox::Eml->new($bref) : undef);
+}
+
+sub event_step {
+	my ($self) = @_;
+	eval {
+		my $ctx = $self->{ctx};
+		if ($ctx->{env}->{'pi-httpd.async'}) {
+			ibx_async_cat($ctx->{ibx}, $self->{smsg}->{blob},
+					\&diff_msg_i_async, $self);
+		} else {
+			diff_msg_i($self, $ctx->{ibx}->smsg_eml($self->{smsg}));
+		}
+	};
+	if ($@) {
+		warn "E: $@";
+		delete $self->{smsg};
+		$self->{ctx}->close;
+	}
+}
+
+sub begin_mail_diff {
+	my ($self) = @_;
+	if (my $async = $self->{ctx}->{env}->{'pi-httpd.async'}) {
+		$async->(undef, undef, $self); # PublicInbox::HTTPD::Async->new
+	} else {
+		event_step($self) while $self->{smsg};
+	}
+}
+
 1;
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 071a2093..b8d6d85e 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -623,7 +623,8 @@ sub _msg_page_prepare {
 			return;
 		}
 		$ctx->{-html_tip} =
-"<pre>WARNING: multiple messages have this Message-ID\n</pre><pre>";
+qq[<pre>WARNING: multiple messages have this Message-ID (<a
+href="d/">diff</a>)</pre><pre>];
 	} else {
 		$ctx->{first_hdr} = $eml->header_obj;
 		$ctx->{chash} = content_hash($eml) if $ctx->{smsg}; # reused MID
@@ -1225,4 +1226,30 @@ sub ghost_index_entry {
 		. '</pre>' . $end;
 }
 
+# /$INBOX/$MSGID/d/ endpoint
+sub diff_msg {
+	my ($ctx) = @_;
+	require PublicInbox::MailDiff;
+	my $ibx = $ctx->{ibx};
+	my $over = $ibx->over or return no_over_html($ctx);
+	my ($id, $prev);
+	my $md = bless { ctx => $ctx }, 'PublicInbox::MailDiff';
+	my $next_arg = $md->{next_arg} = [ $ctx->{mid}, \$id, \$prev ];
+	my $smsg = $md->{smsg} = $over->next_by_mid(@$next_arg) or
+		return; # undef == 404
+	$ctx->{-t_max} = $smsg->{ts};
+	$ctx->{-upfx} = '../../';
+	$ctx->{-apfx} = '//'; # fail on to_attr()
+	$ctx->{-linkify} = PublicInbox::Linkify->new;
+	my $mid = ascii_html($smsg->{mid});
+	$ctx->{-title_html} = "diff for duplicates of &lt;$mid&gt;";
+	PublicInbox::WwwStream::html_init($ctx);
+	print { $ctx->{zfh} } '<pre>diff for duplicates of &lt;<a href="../">',
+				$mid, "</a>&gt;\n\n";
+	sub {
+		$ctx->attach($_[0]->([200, delete $ctx->{-res_hdr}]));
+		$md->begin_mail_diff;
+	};
+}
+
 1;
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index f861b192..9ffcb879 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -25,7 +25,7 @@ use PublicInbox::Eml;
 # TODO: consider a routing tree now that we have more endpoints:
 our $INBOX_RE = qr!\A/([\w\-][\w\.\-\+]*)!;
 our $MID_RE = qr!([^/]+)!;
-our $END_RE = qr!(T/|t/|t\.mbox(?:\.gz)?|t\.atom|raw|)!;
+our $END_RE = qr!(T/|t/|d/|t\.mbox(?:\.gz)?|t\.atom|raw|)!;
 our $ATTACH_RE = qr!([0-9][0-9\.]*)-($PublicInbox::Hval::FN)!;
 our $OID_RE = qr![a-f0-9]{7,}!;
 
@@ -452,6 +452,10 @@ sub msg_page {
 
 	# legacy, but no redirect for compatibility:
 	'f/' eq $e and return get_mid_html($ctx);
+	if ($e eq 'd/') {
+		require PublicInbox::View;
+		return PublicInbox::View::diff_msg($ctx);
+	}
 	r404($ctx);
 }
 
diff --git a/t/psgi_v2.t b/t/psgi_v2.t
index 6b1b3a39..f709c3c7 100644
--- a/t/psgi_v2.t
+++ b/t/psgi_v2.t
@@ -220,6 +220,9 @@ my $client1 = sub {
 		like($raw, qr!>\Q$mid\E</a>!s, "Message-ID $mid shown");
 	}
 	like($raw, qr/\b3\+ messages\b/, 'thread overview shown');
+
+	$res = $cb->(GET('/v2test/a-mid@b/d/'));
+	is($res->code, 200, '/d/ (diff) endpoint works');
 };
 
 test_psgi(sub { $www->call(@_) }, $client1);

^ permalink raw reply related	[flat|nested] 2+ messages in thread

* [1/2 PATCH] hoist MailDiff and ContentDigestDbg out of lei
  2023-01-11 10:55 [PATCH] www: /$INBOX/$MSGID/d/ to diff reused Message-IDs Eric Wong
@ 2023-01-11 11:00 ` Eric Wong
  0 siblings, 0 replies; 2+ messages in thread
From: Eric Wong @ 2023-01-11 11:00 UTC (permalink / raw)
  To: meta

These will be reused in the web UI, too.
---
 <20230111105539.302803-1-e@80x24.org> was actually [2/2] of
 this series.  My mind drifted and I thought it was just one
 patch :x

 MANIFEST                            |  3 ++
 lib/PublicInbox/ContentDigestDbg.pm | 17 +++++++
 lib/PublicInbox/LeiMailDiff.pm      | 71 +++--------------------------
 lib/PublicInbox/MailDiff.pm         | 50 ++++++++++++++++++++
 t/lei-mail-diff.t                   | 14 ++++++
 5 files changed, 91 insertions(+), 64 deletions(-)
 create mode 100644 lib/PublicInbox/ContentDigestDbg.pm
 create mode 100644 lib/PublicInbox/MailDiff.pm
 create mode 100644 t/lei-mail-diff.t

diff --git a/MANIFEST b/MANIFEST
index 565317ce..3626e4d2 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -163,6 +163,7 @@ lib/PublicInbox/CmdIPC4.pm
 lib/PublicInbox/CompressNoop.pm
 lib/PublicInbox/Config.pm
 lib/PublicInbox/ConfigIter.pm
+lib/PublicInbox/ContentDigestDbg.pm
 lib/PublicInbox/ContentHash.pm
 lib/PublicInbox/DS.pm
 lib/PublicInbox/DSKQXS.pm
@@ -280,6 +281,7 @@ lib/PublicInbox/Lock.pm
 lib/PublicInbox/MDA.pm
 lib/PublicInbox/MID.pm
 lib/PublicInbox/MIME.pm
+lib/PublicInbox/MailDiff.pm
 lib/PublicInbox/ManifestJsGz.pm
 lib/PublicInbox/Mbox.pm
 lib/PublicInbox/MboxGz.pm
@@ -478,6 +480,7 @@ t/lei-import.t
 t/lei-index.t
 t/lei-inspect.t
 t/lei-lcat.t
+t/lei-mail-diff.t
 t/lei-mirror.psgi
 t/lei-mirror.t
 t/lei-p2q.t
diff --git a/lib/PublicInbox/ContentDigestDbg.pm b/lib/PublicInbox/ContentDigestDbg.pm
new file mode 100644
index 00000000..425e8589
--- /dev/null
+++ b/lib/PublicInbox/ContentDigestDbg.pm
@@ -0,0 +1,17 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+package PublicInbox::ContentDigestDbg; # cf. PublicInbox::ContentDigest
+use v5.12;
+use Data::Dumper;
+use Digest::SHA;
+
+sub new { bless { dig => Digest::SHA->new(256), fh => $_[1] }, __PACKAGE__ }
+
+sub add {
+	$_[0]->{dig}->add($_[1]);
+	print { $_[0]->{fh} } Dumper([split(/^/sm, $_[1])]) or die "print $!";
+}
+
+sub hexdigest { $_[0]->{dig}->hexdigest; }
+
+1;
diff --git a/lib/PublicInbox/LeiMailDiff.pm b/lib/PublicInbox/LeiMailDiff.pm
index 2b4cfd9e..c813144f 100644
--- a/lib/PublicInbox/LeiMailDiff.pm
+++ b/lib/PublicInbox/LeiMailDiff.pm
@@ -4,59 +4,16 @@
 # The "lei mail-diff" sub-command, diffs input contents against
 # the first message of input
 package PublicInbox::LeiMailDiff;
-use strict;
-use v5.10.1;
-use parent qw(PublicInbox::IPC PublicInbox::LeiInput);
-use File::Temp 0.19 (); # 0.19 for ->newdir
+use v5.12;
+use parent qw(PublicInbox::IPC PublicInbox::LeiInput PublicInbox::MailDiff);
 use PublicInbox::Spawn qw(spawn which);
-use PublicInbox::MsgIter qw(msg_part_text);
-use File::Path qw(remove_tree);
-use PublicInbox::ContentHash qw(content_digest);
+use File::Path ();
 require PublicInbox::LeiRediff;
-use Data::Dumper ();
-
-sub write_part { # Eml->each_part callback
-	my ($ary, $self) = @_;
-	my ($part, $depth, $idx) = @$ary;
-	if ($idx ne '1' || $self->{lei}->{opt}->{'raw-header'}) {
-		open my $fh, '>', "$self->{curdir}/$idx.hdr" or die "open: $!";
-		print $fh ${$part->{hdr}} or die "print $!";
-		close $fh or die "close $!";
-	}
-	my $ct = $part->content_type || 'text/plain';
-	my ($s, $err) = msg_part_text($part, $ct);
-	my $sfx = defined($s) ? 'txt' : 'bin';
-	open my $fh, '>', "$self->{curdir}/$idx.$sfx" or die "open: $!";
-	print $fh ($s // $part->body) or die "print $!";
-	close $fh or die "close $!";
-}
-
-sub dump_eml ($$$) {
-	my ($self, $dir, $eml) = @_;
-	local $self->{curdir} = $dir;
-	mkdir $dir or die "mkdir($dir): $!";
-	$eml->each_part(\&write_part, $self);
-
-	open my $fh, '>', "$dir/content_digest" or die "open: $!";
-	my $dig = PublicInbox::ContentDigestDbg->new($fh);
-	local $Data::Dumper::Useqq = 1;
-	local $Data::Dumper::Terse = 1;
-	content_digest($eml, $dig);
-	print $fh "\n", $dig->hexdigest, "\n" or die "print $!";
-	close $fh or die "close: $!";
-}
-
-sub prep_a ($$) {
-	my ($self, $eml) = @_;
-	$self->{tmp} = File::Temp->newdir('lei-mail-diff-XXXX', TMPDIR => 1);
-	dump_eml($self, "$self->{tmp}/a", $eml);
-}
 
 sub diff_a ($$) {
 	my ($self, $eml) = @_;
-	++$self->{nr};
-	my $dir = "$self->{tmp}/N$self->{nr}";
-	dump_eml($self, $dir, $eml);
+	my $dir = "$self->{tmp}/N".(++$self->{nr});
+	$self->dump_eml($dir, $eml);
 	my $cmd = [ qw(git diff --no-index) ];
 	my $lei = $self->{lei};
 	PublicInbox::LeiRediff::_lei_diff_prepare($lei, $cmd);
@@ -71,7 +28,7 @@ sub diff_a ($$) {
 
 sub input_eml_cb { # used by PublicInbox::LeiInput::input_fh
 	my ($self, $eml) = @_;
-	$self->{tmp} ? diff_a($self, $eml) : prep_a($self, $eml);
+	$self->{tmp} ? diff_a($self, $eml) : $self->prep_a($eml);
 }
 
 sub lei_mail_diff {
@@ -82,24 +39,10 @@ sub lei_mail_diff {
 	$lei->{opt}->{color} //= $isatty;
 	$lei->start_pager if $isatty;
 	$lei->{-err_type} = 'non-fatal';
+	$self->{-raw_hdr} = $lei->{opt}->{'raw-header'};
 	$lei->wq1_start($self);
 }
 
 no warnings 'once';
 *net_merge_all_done = \&PublicInbox::LeiInput::input_only_net_merge_all_done;
-
-package PublicInbox::ContentDigestDbg; # cf. PublicInbox::ContentDigest
-use strict;
-use v5.10.1;
-use Data::Dumper;
-
-sub new { bless { dig => Digest::SHA->new(256), fh => $_[1] }, __PACKAGE__ }
-
-sub add {
-	$_[0]->{dig}->add($_[1]);
-	print { $_[0]->{fh} } Dumper([split(/^/sm, $_[1])]) or die "print $!";
-}
-
-sub hexdigest { $_[0]->{dig}->hexdigest; }
-
 1;
diff --git a/lib/PublicInbox/MailDiff.pm b/lib/PublicInbox/MailDiff.pm
new file mode 100644
index 00000000..06eb3a0d
--- /dev/null
+++ b/lib/PublicInbox/MailDiff.pm
@@ -0,0 +1,50 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+package PublicInbox::MailDiff;
+use v5.12;
+use File::Temp 0.19 (); # 0.19 for ->newdir
+use PublicInbox::ContentHash qw(content_digest);
+use PublicInbox::ContentDigestDbg;
+use Data::Dumper ();
+use PublicInbox::MsgIter qw(msg_part_text);
+
+sub write_part { # Eml->each_part callback
+	my ($ary, $self) = @_;
+	my ($part, $depth, $idx) = @$ary;
+	if ($idx ne '1' || $self->{-raw_hdr}) {
+		open my $fh, '>', "$self->{curdir}/$idx.hdr" or die "open: $!";
+		print $fh ${$part->{hdr}} or die "print $!";
+		close $fh or die "close $!";
+	}
+	my $ct = $part->content_type || 'text/plain';
+	my ($s, $err) = msg_part_text($part, $ct);
+	my $sfx = defined($s) ? 'txt' : 'bin';
+	open my $fh, '>', "$self->{curdir}/$idx.$sfx" or die "open: $!";
+	print $fh ($s // $part->body) or die "print $!";
+	close $fh or die "close $!";
+}
+
+# public
+sub dump_eml ($$$) {
+	my ($self, $dir, $eml) = @_;
+	local $self->{curdir} = $dir;
+	mkdir $dir or die "mkdir($dir): $!";
+	$eml->each_part(\&write_part, $self);
+
+	open my $fh, '>', "$dir/content_digest" or die "open: $!";
+	my $dig = PublicInbox::ContentDigestDbg->new($fh);
+	local $Data::Dumper::Useqq = 1;
+	local $Data::Dumper::Terse = 1;
+	content_digest($eml, $dig);
+	print $fh "\n", $dig->hexdigest, "\n" or die "print $!";
+	close $fh or die "close: $!";
+}
+
+# public
+sub prep_a ($$) {
+	my ($self, $eml) = @_;
+	$self->{tmp} = File::Temp->newdir('mail-diff-XXXX', TMPDIR => 1);
+	dump_eml($self, "$self->{tmp}/a", $eml);
+}
+
+1;
diff --git a/t/lei-mail-diff.t b/t/lei-mail-diff.t
new file mode 100644
index 00000000..9398596a
--- /dev/null
+++ b/t/lei-mail-diff.t
@@ -0,0 +1,14 @@
+#!perl -w
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use v5.12; use PublicInbox::TestCommon;
+
+test_lei(sub {
+	ok(!lei('mail-diff', 't/data/0001.patch', 't/data/binary.patch'),
+		'different messages are different');
+	like($lei_out, qr/^\+/m, 'diff shown');
+	lei_ok('mail-diff', 't/data/0001.patch', 't/data/0001.patch');
+	is($lei_out, '', 'no output if identical');
+});
+
+done_testing;

^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2023-01-11 11:01 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-01-11 10:55 [PATCH] www: /$INBOX/$MSGID/d/ to diff reused Message-IDs Eric Wong
2023-01-11 11:00 ` [1/2 PATCH] hoist MailDiff and ContentDigestDbg out of lei Eric Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).