unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 1/2] lei rediff: regenerate diffs from stdin
Date: Wed,  5 May 2021 10:46:37 +0000	[thread overview]
Message-ID: <20210505104638.68435-2-e@80x24.org> (raw)
In-Reply-To: <20210505104638.68435-1-e@80x24.org>

Sometimes a mailed patch is generated with non-ideal output,
(lacking context, noisy whitespace changes, etc.), or a user
wants to use the same external diff viewer they've configured
git to use.

Since we have SolverGit to regenerate arbitrary blobs from
patches; this new command allows us to regenerate a diff with
different options using the blobs SolverGit gives us.

The amount of git-diff(1) options is mind numbing, so it's
likely I missed some favorites or botched the getopt spec
translation.

This also fixes Inbox::base_url to check psgi.url_scheme
before attempting to generate URLs and avoid uninitialized
variable warnings.  Oddly, the "lei blob" tests did not
trigger these uninitialized warnings.

Note: this will automatically import+index the message(s)
it's regenerating, because solver relies on being able
to lookup pre/postimage OIDs and read blobs.
---
 MANIFEST                     |   1 +
 lib/PublicInbox/Inbox.pm     |   2 +-
 lib/PublicInbox/LEI.pm       |  22 ++++
 lib/PublicInbox/LeiInput.pm  |   6 +
 lib/PublicInbox/LeiRediff.pm | 245 +++++++++++++++++++++++++++++++++++
 t/solver_git.t               |   8 +-
 6 files changed, 282 insertions(+), 2 deletions(-)
 create mode 100644 lib/PublicInbox/LeiRediff.pm

diff --git a/MANIFEST b/MANIFEST
index b40147b0..7be07aa5 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -211,6 +211,7 @@ lib/PublicInbox/LeiMirror.pm
 lib/PublicInbox/LeiOverview.pm
 lib/PublicInbox/LeiP2q.pm
 lib/PublicInbox/LeiQuery.pm
+lib/PublicInbox/LeiRediff.pm
 lib/PublicInbox/LeiRemote.pm
 lib/PublicInbox/LeiSavedSearch.pm
 lib/PublicInbox/LeiSearch.pm
diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm
index da7ea75f..b94ffdb0 100644
--- a/lib/PublicInbox/Inbox.pm
+++ b/lib/PublicInbox/Inbox.pm
@@ -241,7 +241,7 @@ sub cloneurl {
 
 sub base_url {
 	my ($self, $env) = @_; # env - PSGI env
-	if ($env) {
+	if ($env && $env->{'psgi.url_scheme'}) {
 		my $url = PublicInbox::Git::host_prefix_url($env, '');
 		# for mount in Plack::Builder
 		$url .= '/' if $url !~ m!/\z!;
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index c5fdfeb8..9dbbeba9 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -135,6 +135,23 @@ my @lxs_opt = (qw(remote! local! external! include|I=s@ exclude=s@ only=s@
 	import-remote! no-torsocks torsocks=s),
 	PublicInbox::LeiQuery::curl_opt());
 
+# we don't support -C as an alias for --find-copies since it's already
+# used for chdir
+our @diff_opt = qw(unified|U=i output-indicator-new=s output-indicator-old=s
+	output-indicator-context=s indent-heuristic!
+	minimal patience histogram anchored=s@ diff-algorithm=s
+	color-moved=s color-moved-ws=s no-color-moved no-color-moved-ws
+	word-diff:s word-diff-regex=s color-words:s no-renames
+	rename-empty! check ws-error-highlight=s full-index binary
+	abbrev:i break-rewrites|B:s find-renames|M:s find-copies:s
+	find-copies-harder irreversible-delete|D l=i diff-filter=s
+	S=s G=s find-object=s pickaxe-all pickaxe-regex O=s R
+	relative:s text|a ignore-cr-at-eol ignore-space-at-eol
+	ignore-space-change|b ignore-all-space|w ignore-blank-lines
+	inter-hunk-context=i function-context|W exit-code ext-diff
+	no-ext-diff textconv! src-prefix=s dst-prefix=s no-prefix
+	line-prefix=s);
+
 # we generate shell completion + help using %CMD and %OPTDESC,
 # see lei__complete() and PublicInbox::LeiHelp
 # command => [ positional_args, 1-line description, Getopt::Long option spec ]
@@ -162,6 +179,11 @@ our %CMD = ( # sorted in order of importance/use:
 	qw(git-dir=s@ cwd! verbose|v+ mail! oid-a|A=s path-a|a=s path-b|b=s),
 	@lxs_opt, @c_opt ],
 
+'rediff' => [ '[--stdin|LOCATION...]',
+		'regenerate a diff with different options',
+	qw(git-dir=s@ cwd! verbose|v+ color:s no-color),
+	@diff_opt, @lxs_opt, @c_opt ],
+
 'add-external' => [ 'LOCATION',
 	'add/set priority of a publicinbox|extindex for extra matches',
 	qw(boost=i mirror=s no-torsocks torsocks=s inbox-version=i
diff --git a/lib/PublicInbox/LeiInput.pm b/lib/PublicInbox/LeiInput.pm
index 46eea111..87083564 100644
--- a/lib/PublicInbox/LeiInput.pm
+++ b/lib/PublicInbox/LeiInput.pm
@@ -69,6 +69,12 @@ error reading $name: $!
 		# but no Content-Length or "From " escaping.
 		# "git format-patch" also generates such files by default.
 		$buf =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
+
+		# a user may feed just a body: git diff | lei rediff -U9
+		if ($self->{-force_eml}) {
+			my $eml = PublicInbox::Eml->new($buf);
+			substr($buf, 0, 0) = "\n\n" if !$eml->{bdy};
+		}
 		$self->input_eml_cb(PublicInbox::Eml->new(\$buf), @args);
 	} else {
 		# prepare_inputs already validated $ifmt
diff --git a/lib/PublicInbox/LeiRediff.pm b/lib/PublicInbox/LeiRediff.pm
new file mode 100644
index 00000000..6c734bef
--- /dev/null
+++ b/lib/PublicInbox/LeiRediff.pm
@@ -0,0 +1,245 @@
+# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# The "lei rediff" sub-command, regenerates diffs with new options
+package PublicInbox::LeiRediff;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::IPC PublicInbox::LeiInput);
+use File::Temp 0.19 (); # 0.19 for ->newdir
+use PublicInbox::Spawn qw(spawn which);
+use PublicInbox::MsgIter qw(msg_part_text);
+use PublicInbox::ViewDiff;
+use PublicInbox::LeiBlob;
+use PublicInbox::Git qw(git_quote git_unquote);
+use PublicInbox::Import;
+use PublicInbox::LEI;
+use PublicInbox::SolverGit;
+
+sub rediff_user_cb { # called by solver when done
+	my ($res, $self) = @_;
+	my $lei = $self->{lei};
+	my $log_buf = delete $lei->{log_buf};
+	$$log_buf =~ s/^/# /sgm;
+	ref($res) eq 'ARRAY' or return $lei->child_error(1 << 8, $$log_buf);
+	$lei->qerr($$log_buf);
+	my ($git, $oid, $type, $size, $di) = @$res;
+	my $oid_want = delete $self->{cur_oid_want};
+
+	# don't try to support all the git-show(1) options for non-blob,
+	# this is just a convenience:
+	$type ne 'blob' and return $lei->err(<<EOF);
+# $oid is a $type of $size bytes in:
+# $git->{git_dir} (wanted: $oid_want)
+EOF
+	$self->{blob}->{$oid_want} = $oid;
+	push @{$self->{gits}}, $git if $git->{-tmp};
+}
+
+# returns a full blob for oid_want
+sub solve_1 ($$$) {
+	my ($self, $oid_want, $hints) = @_;
+	return if $oid_want =~ /\A0+\z/;
+	$self->{cur_oid_want} = $oid_want;
+	my $solver = bless {
+		gits => $self->{gits},
+		user_cb => \&rediff_user_cb,
+		uarg => $self,
+		inboxes => [ $self->{lxs}->locals, @{$self->{rmt}} ],
+	}, 'PublicInbox::SolverGit';
+	open my $log, '+>', \(my $log_buf = '') or die "PerlIO::scalar: $!";
+	$self->{lei}->{log_buf} = \$log_buf;
+	local $PublicInbox::DS::in_loop = 0; # waitpid synchronously
+	$solver->solve($self->{lei}->{env}, $log, $oid_want, $hints);
+	$self->{blob}->{$oid_want}; # full OID
+}
+
+sub diff_ctxq ($$) {
+	my ($self, $ctxq) = @_;
+	return unless $ctxq;
+	my $blob = $self->{blob};
+	my $ta = <<'EOM';
+reset refs/heads/A
+commit refs/heads/A
+author <a@s> 0 +0000
+committer <c@s> 0 +0000
+data 0
+EOM
+	my $tb = $ta;
+	$tb =~ tr!A!B!;
+	my $lei = $self->{lei};
+	my $wait = delete($self->{-do_done}) ? $lei->{sto}->ipc_do('done') : 0;
+	while (my ($oid_a, $oid_b, $pa, $pb) = splice(@$ctxq, 0, 4)) {
+		my $xa = $blob->{$oid_a} //= solve_1($self, $oid_a,
+							{ path_b => $pa });
+		my $xb = $blob->{$oid_b} //= solve_1($self, $oid_b, {
+						oid_a => $oid_a,
+						path_a => $pa,
+						path_b => $pb
+					});
+		$ta .= "M 100644 $xa ".git_quote($pa)."\n" if $xa;
+		$tb .= "M 100644 $xb ".git_quote($pb)."\n" if $xb;
+	}
+	my $rw = $self->{gits}->[-1]; # has all known alternates
+	if (!$rw->{-tmp}) {
+		my $d = "$self->{rdtmp}/for_tree.git";
+		-d $d or PublicInbox::Import::init_bare($d);
+		my $f = "$d/objects/info/alternates"; # always overwrite
+		open my $fh, '>', $f or die "open $f: $!";
+		for my $git (@{$self->{gits}}) {
+			print $fh $git->git_path('objects'),"\n";
+		}
+		close $fh or die "close $f: $!";
+		$rw = PublicInbox::Git->new($d);
+	}
+	pipe(my ($r, $w)) or die "pipe: $!";
+	my $pid = spawn(['git', "--git-dir=$rw->{git_dir}",
+			qw(fast-import --quiet --done --date-format=raw)],
+			$lei->{env}, { 2 => $lei->{2}, 0 => $r });
+	close $r or die "close r fast-import: $!";
+	print $w $ta, "\n", $tb, "\ndone\n" or die "print fast-import: $!";
+	close $w or die "close w fast-import: $!";
+	waitpid($pid, 0);
+	die "fast-import failed: \$?=$?" if $?;
+
+	my @cmd = qw(diff);
+	my $opt = $lei->{opt};
+	push @cmd, '--'.($opt->{color} && !$opt->{'no-color'} ? '' : 'no-').
+			'color';
+	for my $o (@PublicInbox::LEI::diff_opt) {
+		$o =~ s/\|([a-z0-9])\b//i; # remove single char short option
+		my $c = $1;
+		if ($o =~ s/=[is]@\z//) {
+			my $v = $opt->{$o} or next;
+			push @cmd, map { $c ? "-$c$_" : "--$o=$_" } @$v;
+		} elsif ($o =~ s/=[is]\z//) {
+			my $v = $opt->{$o} // next;
+			push @cmd, $c ? "-$c$v" : "--$o=$v";
+		} elsif ($o =~ s/:[is]\z//) {
+			my $v = $opt->{$o} // next;
+			push @cmd, $c ? "-$c$v" :
+					($v eq '' ? "--$o" : "--$o=$v");
+		} elsif ($o =~ s/!\z//) {
+			my $v = $opt->{$o} // next;
+			push @cmd, $v ? "--$o" : "--no-$o";
+		} elsif ($opt->{$o}) {
+			push @cmd, $c ? "-$c" : "--$o";
+		}
+	}
+	$lei->qerr("# git @cmd");
+	push @cmd, qw(A B);
+	unshift @cmd, 'git', "--git-dir=$rw->{git_dir}";
+	$pid = spawn(\@cmd, $lei->{env}, { 2 => $lei->{2}, 1 => $lei->{1} });
+	waitpid($pid, 0);
+	$lei->child_error($?) if $?; # for git diff --exit-code
+}
+
+sub extract_oids { # Eml each_part callback
+	my ($ary, $self) = @_;
+	my ($p, undef, $idx) = @$ary;
+	$self->{lei}->out($p->header_obj->as_string, "\n");
+	my ($s, undef) = msg_part_text($p, $p->content_type || 'text/plain');
+	defined $s or return;
+	my @top = split($PublicInbox::ViewDiff::EXTRACT_DIFFS, $s);
+	undef $s;
+	my $blobs = $self->{blobs}; # blobs to resolve
+	my $ctxq;
+	while (defined(my $x = shift @top)) {
+		if (scalar(@top) >= 4 &&
+				$top[1] =~ $PublicInbox::ViewDiff::IS_OID &&
+				$top[0] =~ $PublicInbox::ViewDiff::IS_OID) {
+			my ($oid_a, $oid_b, $pa, $pb) = splice(@top, 0, 4);
+			$pa eq '/dev/null' or
+				$pa = (split(m'/', git_unquote($pa), 2))[1];
+			$pb eq '/dev/null' or
+				$pb = (split(m'/', git_unquote($pb), 2))[1];
+			$blobs->{$oid_a} //= undef;
+			$blobs->{$oid_b} //= undef;
+			push @$ctxq, $oid_a, $oid_b, $pa, $pb;
+		} elsif ($ctxq) {
+			my @out;
+			for (split(/^/sm, $x)) {
+				if (/\A-- \r?\n/s) { # email sig starts
+					push @out, $_;
+					$ctxq = diff_ctxq($self, $ctxq);
+				} elsif ($ctxq && (/\A[\+\- ]/ || /\A@@ / ||
+					# allow totally blank lines w/o leading
+					# SP, git-apply does:
+							/\A\r?\n/s)) {
+					next;
+				} else {
+					push @out, $_;
+				}
+			}
+			$self->{lei}->out(@out) if @out;
+		} else {
+			$ctxq = diff_ctxq($self, $ctxq);
+			$self->{lei}->out($x);
+		}
+	}
+	$ctxq = diff_ctxq($self, $ctxq);
+}
+
+sub input_eml_cb { # callback for all emails
+	my ($self, $eml) = @_;
+	$self->{lei}->{sto}->ipc_do('add_eml', $eml);
+	$self->{-do_done} = 1;
+	$eml->each_part(\&extract_oids, $self, 1);
+}
+
+sub lei_rediff {
+	my ($lei, @inputs) = @_;
+	$lei->_lei_store(1)->write_prepare($lei);
+	$lei->{opt}->{stdin} = 1 if !@inputs;
+	$lei->{opt}->{'in-format'} //= 'eml';
+	# maybe it's a non-email (code) blob from a coderepo
+	my $git_dirs = $lei->{opt}->{'git-dir'} //= [];
+	if ($lei->{opt}->{cwd} // 1) {
+		my $cgd = PublicInbox::LeiBlob::get_git_dir($lei, '.');
+		unshift(@$git_dirs, $cgd) if defined $cgd;
+	}
+	return $lei->fail('no --git-dir to try') unless @$git_dirs;
+	my $lxs = $lei->lxs_prepare;
+	if ($lxs->remotes) {
+		require PublicInbox::LeiRemote;
+		$lei->{curl} //= which('curl') or return
+			$lei->fail('curl needed for', $lxs->remotes);
+	}
+	$lei->ale->refresh_externals($lxs);
+	my $self = bless {
+		-force_eml => 1, # for LeiInput->input_fh
+		lxs => $lxs,
+	}, __PACKAGE__;
+	$self->prepare_inputs($lei, \@inputs) or return;
+	my $isatty = -t $lei->{1};
+	$lei->{opt}->{color} //= $isatty;
+	$lei->start_pager if $isatty;
+	my ($op_c, $ops) = $lei->workers_start($self, 1);
+	$lei->{wq1} = $self;
+	net_merge_all_done($self) unless $lei->{auth};
+	$op_c->op_wait_event($ops);
+}
+
+sub ipc_atfork_child {
+	my ($self) = @_;
+	PublicInbox::LeiInput::input_only_atfork_child(@_);
+	my $lei = $self->{lei};
+	$lei->{1}->autoflush(1);
+	binmode $lei->{1}, ':utf8';
+	$self->{blobs} = {}; # oidhex => filename
+	$self->{rdtmp} = File::Temp->newdir('lei-rediff-XXXX', TMPDIR => 1);
+	$self->{rmt} = [ map {
+			PublicInbox::LeiRemote->new($lei, $_)
+		} $self->{lxs}->remotes ];
+	$self->{gits} = [ map {
+			PublicInbox::Git->new($lei->rel2abs($_))
+		} @{$self->{lei}->{opt}->{'git-dir'}} ];
+	$lei->{env}->{'psgi.errors'} = $lei->{2}; # ugh...
+	$lei->{env}->{TMPDIR} = $self->{rdtmp}->dirname;
+	undef;
+}
+
+no warnings 'once';
+*net_merge_all_done = \&PublicInbox::LeiInput::input_only_net_merge_all_done;
+*net_merge_all = \&PublicInbox::LeiAuth::net_merge_all;
+1;
diff --git a/t/solver_git.t b/t/solver_git.t
index 75387b2a..e566efb3 100644
--- a/t/solver_git.t
+++ b/t/solver_git.t
@@ -32,7 +32,7 @@ my $v1_0_0_tag_short = substr($v1_0_0_tag, 0, 16);
 my $expect = '69df7d565d49fbaaeb0a067910f03dc22cd52bd0';
 my $non_existent = 'ee5e32211bf62ab6531bdf39b84b6920d0b6775a';
 
-test_lei({tmpdir => $tmpdir}, sub {
+test_lei({tmpdir => "$tmpdir/blob"}, sub {
 	lei_ok('blob', '--mail', $patch2_oid, '-I', $ibx->{inboxdir},
 		\'--mail works for existing oid');
 	is($lei_out, $patch2->as_string, 'blob matches');
@@ -64,6 +64,12 @@ test_lei({tmpdir => $tmpdir}, sub {
 	lei_ok('blob', $v1_0_0_tag_short, '-I', $ibx->{inboxdir});
 });
 
+test_lei({tmpdir => "$tmpdir/rediff"}, sub {
+	lei_ok(qw(rediff -q -U9 t/solve/0001-simple-mod.patch));
+	like($lei_out, qr!^\Q+++\E b/TODO\n@@ -103,9 \+103,11 @@!sm,
+		'got more context with -U9');
+});
+
 my $git = PublicInbox::Git->new($git_dir);
 $ibx->{-repo_objs} = [ $git ];
 my $res;

  reply	other threads:[~2021-05-05 10:46 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-05-05 10:46 [PATCH 0/2] lei rediff + solver-related fix Eric Wong
2021-05-05 10:46 ` Eric Wong [this message]
2021-05-05 10:46 ` [PATCH 2/2] lei blob: support "lei index"-ed mail Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210505104638.68435-2-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).