unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
* [PATCH 0/3] coderepo: cgit-compatible /tree/ redirect
@ 2023-01-12 14:14 Eric Wong
  2023-01-12 14:14 ` [PATCH 1/3] www_stream: coderepo-specific top bar Eric Wong
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Eric Wong @ 2023-01-12 14:14 UTC (permalink / raw)
  To: meta

I much prefer $REPO/$OID/s/ URLs since they're stable, but we'll
need to handle /tree/ when migrating from cgit.

3/3 adds some git-aware 404 handling for deleted||renamed files;
hopefully it's not too expensive.  It might be a first...

For example, a recent deletion:
https://80x24.org/lore/pub/scm/git/git.git/tree/git-bisect.sh

vs. one from 2005:
https://80x24.org/lore/pub/scm/git/git.git/tree/Documentation/git-mkdelta.txt

Eric Wong (3):
  www_stream: coderepo-specific top bar
  www_coderepo: /tree/ redirects to /$OID/s/
  www_coderepo: /tree/ 404s search git history

 MANIFEST                       |  1 +
 lib/PublicInbox/GitAsyncCat.pm |  2 +-
 lib/PublicInbox/RepoTree.pm    | 84 ++++++++++++++++++++++++++++++++++
 lib/PublicInbox/ViewVCS.pm     | 23 ++++++----
 lib/PublicInbox/WwwCoderepo.pm | 15 +++---
 lib/PublicInbox/WwwStream.pm   | 53 ++++++++++++++++++++-
 t/solver_git.t                 |  7 +++
 7 files changed, 167 insertions(+), 18 deletions(-)
 create mode 100644 lib/PublicInbox/RepoTree.pm

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 1/3] www_stream: coderepo-specific top bar
  2023-01-12 14:14 [PATCH 0/3] coderepo: cgit-compatible /tree/ redirect Eric Wong
@ 2023-01-12 14:14 ` Eric Wong
  2023-01-12 14:14 ` [PATCH 2/3] www_coderepo: /tree/ redirects to /$OID/s/ Eric Wong
  2023-01-12 14:14 ` [PATCH 3/3] www_coderepo: /tree/ 404s search git history Eric Wong
  2 siblings, 0 replies; 5+ messages in thread
From: Eric Wong @ 2023-01-12 14:14 UTC (permalink / raw)
  To: meta

It gets nasty when multiple, non-ALL lists point to the same
coderepo, but I guess ALL exists for that.  Only lightly-tested
with various PSGI prefix mounts, but it seems to be working...
---
 lib/PublicInbox/ViewVCS.pm     | 23 ++++++++++-----
 lib/PublicInbox/WwwCoderepo.pm |  9 +-----
 lib/PublicInbox/WwwStream.pm   | 53 +++++++++++++++++++++++++++++++++-
 3 files changed, 68 insertions(+), 17 deletions(-)

diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm
index 6b641b32..7ac719bc 100644
--- a/lib/PublicInbox/ViewVCS.pm
+++ b/lib/PublicInbox/ViewVCS.pm
@@ -156,10 +156,14 @@ sub show_commit_start { # ->psgi_qx callback
 sub ibx_url_for {
 	my ($ctx) = @_;
 	$ctx->{ibx} and return; # fall back to $upfx
-	$ctx->{git} or return;
+	$ctx->{git} or die 'BUG: no {git}';
 	if (my $ALL = $ctx->{www}->{pi_cfg}->ALL) {
-		return $ALL->base_url // $ALL->base_url($ctx->{env});
-	} elsif (my $ibx_names = $ctx->{git}->{ibx_names}) {
+		if (defined(my $u = $ALL->base_url($ctx->{env}))) {
+			return wantarray ? ($u) : $u;
+		}
+	}
+	my @ret;
+	if (my $ibx_names = $ctx->{git}->{ibx_names}) {
 		my $by_name = $ctx->{www}->{pi_cfg}->{-by_name};
 		for my $name (@$ibx_names) {
 			my $ibx = $by_name->{$name} // do {
@@ -167,12 +171,13 @@ sub ibx_url_for {
 				next;
 			};
 			$ibx->isrch // next;
-			return defined($ibx->{url}) ?
-				prurl($ctx->{env}, $ibx->{url}) :
-				"../../../$name/";
+			my $u = defined($ibx->{url}) ?
+				prurl($ctx->{env}, $ibx->{url}) : $name;
+			$u .= '/' if substr($u, -1) ne '/';
+			push @ret, $u;
 		}
 	}
-	undef;
+	wantarray ? (@ret) : $ret[0];
 }
 
 sub cmt_finalize {
@@ -253,8 +258,10 @@ EOM
 			my $ibx_url = ibx_url_for($ctx);
 			my $alt;
 			if (defined $ibx_url) {
+				$alt = " `$ibx_url'";
+				$ibx_url =~ m!://! or
+					substr($ibx_url, 0, 0, '../../../');
 				$ibx_url = ascii_html($ibx_url);
-				$alt = ' '.$ibx_url;
 			} else {
 				$ibx_url = $upfx;
 				$alt = '';
diff --git a/lib/PublicInbox/WwwCoderepo.pm b/lib/PublicInbox/WwwCoderepo.pm
index e89a6456..2fba0cd0 100644
--- a/lib/PublicInbox/WwwCoderepo.pm
+++ b/lib/PublicInbox/WwwCoderepo.pm
@@ -79,14 +79,7 @@ sub summary_finish {
 		$tip_html .= ' '.ascii_html($tip).' --';
 	}
 	print $zfh <<EOM;
-<pre>
-<a
-href='#readme'>about</a> <a
-href='#heads'>heads</a> <a
-href='#tags'>tags</a>
-
-<a
-id=log>\$</a> git log --pretty=format:'%h %s (%cs)%d'$tip_html
+<pre><a id=log>\$</a> git log --pretty=format:'%h %s (%cs)%d'$tip_html
 EOM
 	for (@r) {
 		my $d; # decorations
diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm
index 59edad5d..8c40096a 100644
--- a/lib/PublicInbox/WwwStream.pm
+++ b/lib/PublicInbox/WwwStream.pm
@@ -38,9 +38,60 @@ sub async_eml { # for async_blob_cb
 	$ctx->write($ctx->{cb}->($ctx, $eml));
 }
 
+sub html_repo_top ($) {
+	my ($ctx) = @_;
+	my $git = $ctx->{git};
+	my $desc = ascii_html($git->description);
+	my $title = delete($ctx->{-title_html}) // $desc;
+	my $upfx = $ctx->{-upfx} // '';
+	my $atom = $ctx->{-atom} // (substr($upfx, -1) eq '/' ?
+					"${upfx}atom/" : "$upfx/atom/");
+	my $top = ascii_html($git->{nick});
+	$top = qq(<a\nhref="$upfx">$top</a>) if length($upfx);
+	$top .= <<EOM;
+  <a href='$upfx#readme'>about</a> / <a
+href='$upfx#heads'>heads</a> / <a
+href='$upfx#tags'>tags</a>
+<b>$desc</b>
+EOM
+	my @url = PublicInbox::ViewVCS::ibx_url_for($ctx);
+	if (@url) {
+		$ctx->{-has_srch} = 1;
+		my $base_url = base_url($ctx);
+		my ($pfx, $sfx) = ($base_url =~ m!\A(https?://[^/]+/)(.*)\z!i);
+		my $iupfx = '../' x (($sfx =~ tr!/!/!) + 1);
+		$pfx = ascii_html($pfx);
+		$pfx = qr/\A\Q$pfx\E/i;
+		my $tmp = $top;
+		$top = '';
+		my ($s, $u, $same_host);
+		my $q_val = delete($ctx->{-q_value_html}) // '';
+		$q_val = qq(\nvalue="$q_val") if $q_val ne '';
+		for (@url) {
+			$u = $s = ascii_html($_);
+			substr($u, 0, 0, $iupfx) if $u !~ m!://!;
+			$s =~ s!$pfx!!;
+			$s =~ s!/\z!!;
+			$top .= qq{<form\naction="$u"><pre>$tmp} .
+				qq{<input\nname=q type=text$q_val />} .
+				qq{<input type=submit\n} .
+				qq{value="search mail in `$s&#39;"/>} .
+				q{</pre></form>};
+			$tmp = '';
+		}
+	} else {
+		$top = "<pre>$top</pre>";
+	}
+	"<html><head><title>$title</title>" .
+		qq(<link\nrel=alternate\ntitle="Atom feed"\n).
+		qq(href="$atom"\ntype="application/atom+xml"/>) .
+		$ctx->{www}->style($upfx) .
+		'</head><body>'.$top;
+}
+
 sub html_top ($) {
 	my ($ctx) = @_;
-	my $ibx = $ctx->{ibx} // $ctx->{git};
+	my $ibx = $ctx->{ibx} // return html_repo_top($ctx);
 	my $desc = ascii_html($ibx->description);
 	my $title = delete($ctx->{-title_html}) // $desc;
 	my $upfx = $ctx->{-upfx} || '';

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/3] www_coderepo: /tree/ redirects to /$OID/s/
  2023-01-12 14:14 [PATCH 0/3] coderepo: cgit-compatible /tree/ redirect Eric Wong
  2023-01-12 14:14 ` [PATCH 1/3] www_stream: coderepo-specific top bar Eric Wong
@ 2023-01-12 14:14 ` Eric Wong
  2023-01-12 14:14 ` [PATCH 3/3] www_coderepo: /tree/ 404s search git history Eric Wong
  2 siblings, 0 replies; 5+ messages in thread
From: Eric Wong @ 2023-01-12 14:14 UTC (permalink / raw)
  To: meta

This is for compatibility with cgit to ease migration.
---
 MANIFEST                       |  1 +
 lib/PublicInbox/GitAsyncCat.pm |  2 +-
 lib/PublicInbox/RepoTree.pm    | 44 ++++++++++++++++++++++++++++++++++
 lib/PublicInbox/WwwCoderepo.pm |  6 +++++
 t/solver_git.t                 |  7 ++++++
 5 files changed, 59 insertions(+), 1 deletion(-)
 create mode 100644 lib/PublicInbox/RepoTree.pm

diff --git a/MANIFEST b/MANIFEST
index 3626e4d2..c494d6f7 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -311,6 +311,7 @@ lib/PublicInbox/Qspawn.pm
 lib/PublicInbox/Reply.pm
 lib/PublicInbox/RepoAtom.pm
 lib/PublicInbox/RepoSnapshot.pm
+lib/PublicInbox/RepoTree.pm
 lib/PublicInbox/SaPlugin/ListMirror.pm
 lib/PublicInbox/SaPlugin/ListMirror.pod
 lib/PublicInbox/Search.pm
diff --git a/lib/PublicInbox/GitAsyncCat.pm b/lib/PublicInbox/GitAsyncCat.pm
index 2e0725a6..6dda7340 100644
--- a/lib/PublicInbox/GitAsyncCat.pm
+++ b/lib/PublicInbox/GitAsyncCat.pm
@@ -75,7 +75,7 @@ sub ibx_async_cat ($$$$) {
 }
 
 sub async_check ($$$$) {
-	my ($ibx, $oidish, $cb, $arg) = @_;
+	my ($ibx, $oidish, $cb, $arg) = @_; # $ibx may be $ctx
 	my $git = $ibx->{git} // $ibx->git;
 	$git->check_async($oidish, $cb, $arg);
 	$git->{async_chk} //= do {
diff --git a/lib/PublicInbox/RepoTree.pm b/lib/PublicInbox/RepoTree.pm
new file mode 100644
index 00000000..7f2ff206
--- /dev/null
+++ b/lib/PublicInbox/RepoTree.pm
@@ -0,0 +1,44 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+#
+# cgit-compatible $REPO/tree/[PATH]?h=$tip redirector
+package PublicInbox::RepoTree;
+use v5.12;
+use PublicInbox::ViewDiff qw(uri_escape_path);
+use PublicInbox::GitAsyncCat;
+use PublicInbox::WwwStatic qw(r);
+
+sub tree_30x { # git check_async callback
+	my ($oid, $type, $size, $ctx) = @_;
+	my $wcb = delete $ctx->{-wcb};
+	return $wcb->(r(404)) if $type eq 'missing';
+	my $u = $ctx->{git}->base_url($ctx->{env});
+	my $path = uri_escape_path(delete $ctx->{-path});
+	$u .= "$oid/s/?b=$path";
+	$wcb->([ 302, [ Location => $u, 'Content-Type' => 'text/plain' ],
+		[ "Redirecting to $u\n" ] ])
+}
+
+sub srv_tree {
+	my ($ctx, $path) = @_;
+	return if index($path, '//') >= 0 || index($path, '/') == 0;
+	my $tip = $ctx->{qp}->{h} // 'HEAD';
+	$path =~ s!/\z!!;
+	my $obj = $ctx->{-obj} = "$tip:$path";
+	$ctx->{-path} = $path;
+
+	# "\n" breaks with `git cat-file --batch-check', and there's no
+	# legitimate use of "\n" in filenames anyways.
+	return if index($obj, "\n") >= 0;
+	sub {
+		$ctx->{-wcb} = $_[0]; # HTTP::{Chunked,Identity}
+		if ($ctx->{env}->{'pi-httpd.async'}) {
+			async_check($ctx, $obj, \&tree_30x, $ctx);
+		} else {
+			$ctx->{git}->check_async($obj, \&tree_30x, $ctx);
+			$ctx->{git}->async_wait_all;
+		}
+	};
+}
+
+1;
diff --git a/lib/PublicInbox/WwwCoderepo.pm b/lib/PublicInbox/WwwCoderepo.pm
index 2fba0cd0..668b6398 100644
--- a/lib/PublicInbox/WwwCoderepo.pm
+++ b/lib/PublicInbox/WwwCoderepo.pm
@@ -18,6 +18,7 @@ use PublicInbox::Hval qw(ascii_html);
 use PublicInbox::ViewDiff qw(uri_escape_path);
 use PublicInbox::RepoSnapshot;
 use PublicInbox::RepoAtom;
+use PublicInbox::RepoTree;
 
 my $EACH_REF = "git for-each-ref --sort=-creatordate --format='%(HEAD)%00".
 	join('%00', map { "%($_)" }
@@ -226,6 +227,11 @@ sub srv { # endpoint called by PublicInbox::WWW
 			($ctx->{git} = $cr->{$1}) and
 		return PublicInbox::ViewVCS::show($ctx, $2);
 
+	if ($path_info =~ m!\A/(.+?)/tree/(.*)\z! and
+			($ctx->{git} = $cr->{$1})) {
+		return PublicInbox::RepoTree::srv_tree($ctx, $2) // r(404);
+	}
+
 	# snapshots:
 	if ($path_info =~ m!\A/(.+?)/snapshot/([^/]+)\z! and
 			($ctx->{git} = $cr->{$1})) {
diff --git a/t/solver_git.t b/t/solver_git.t
index 89ed0362..5519fa18 100644
--- a/t/solver_git.t
+++ b/t/solver_git.t
@@ -381,6 +381,13 @@ EOF
 			$res = $cb->(GET('/public-inbox/atom/README.md'));
 			is($res->code, 404, '404 on non-existent file Atom feed');
 		}
+
+		$res = $cb->(GET('/public-inbox/tree/'));
+		is($res->code, 302, 'got redirect');
+		$res = $cb->(GET('/public-inbox/tree/README'));
+		is($res->code, 302, 'got redirect for regular file');
+		$res = $cb->(GET('/public-inbox/tree/Documentation'));
+		is($res->code, 302, 'got redirect for directory');
 	};
 	test_psgi(sub { $www->call(@_) }, $client);
 	my $env = { PI_CONFIG => $cfgpath, TMPDIR => $tmpdir };

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 3/3] www_coderepo: /tree/ 404s search git history
  2023-01-12 14:14 [PATCH 0/3] coderepo: cgit-compatible /tree/ redirect Eric Wong
  2023-01-12 14:14 ` [PATCH 1/3] www_stream: coderepo-specific top bar Eric Wong
  2023-01-12 14:14 ` [PATCH 2/3] www_coderepo: /tree/ redirects to /$OID/s/ Eric Wong
@ 2023-01-12 14:14 ` Eric Wong
  2023-01-12 14:19   ` Eric Wong
  2 siblings, 1 reply; 5+ messages in thread
From: Eric Wong @ 2023-01-12 14:14 UTC (permalink / raw)
  To: meta

Displaying git trees over the web with pathnames in the URLs
have the unfortunate consequence of URLs getting out-of-date
if files are renamed or deleted from the latest tree.

We can utilize `git log' here to search history and find the
commit which led to the rename or deletion.  Of course, we'll
show a suitable command to the user as well, another small
step towards covertly teaching users the git CLI :>

`git log' is not especially fast, here, but Qspawn limiters can
do their job and renames and deletions aren't too common in most
codebases.
---
 lib/PublicInbox/RepoTree.pm | 42 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 41 insertions(+), 1 deletion(-)

diff --git a/lib/PublicInbox/RepoTree.pm b/lib/PublicInbox/RepoTree.pm
index 7f2ff206..5b502a45 100644
--- a/lib/PublicInbox/RepoTree.pm
+++ b/lib/PublicInbox/RepoTree.pm
@@ -7,11 +7,50 @@ use v5.12;
 use PublicInbox::ViewDiff qw(uri_escape_path);
 use PublicInbox::GitAsyncCat;
 use PublicInbox::WwwStatic qw(r);
+use PublicInbox::Qspawn;
+use PublicInbox::WwwStream qw(html_oneshot);
+use PublicInbox::Hval qw(ascii_html);
+
+sub rd_404_log {
+	my ($bref, $ctx) = @_;
+	my $path = $ctx->{-q_value_html} = ascii_html($ctx->{-path});
+	my $tip = 'HEAD';
+	$tip = ascii_html($ctx->{qp}->{h}) if defined($ctx->{qp}->{h});
+	PublicInbox::WwwStream::html_init($ctx);
+	my $zfh = $ctx->{zfh};
+	print $zfh "<pre>\$ git log -1 $tip -- $path\n";
+	if ($$bref eq '') {
+		say $zfh "found no record of `$path' in git history";
+		$ctx->{-has_srch} and
+			say $zfh 'perhaps try searching mail (above)';
+	} else {
+		my ($H, $h, $s_as) = split(/ /, $$bref, 3);
+		utf8::decode($s_as);
+		my $x = uri_escape_path($ctx->{-path});
+		$s_as = ascii_html($s_as);
+		print $zfh <<EOM;
+found last record of `$path' in the following commit:
+<a href="$ctx->{-upfx}$H/s/?b=$x">$h</a> $s_as
+EOM
+	}
+	delete($ctx->{-wcb})->($ctx->html_done);
+}
+
+sub find_missing {
+	my ($ctx) = @_;
+	my $cmd = ['git', "--git-dir=$ctx->{git}->{git_dir}",
+		qw(log --no-color -1), '--pretty=%H %h %s (%as)' ];
+	push @$cmd, $ctx->{qp}->{h} if defined($ctx->{qp}->{h});
+	push @$cmd, '--';
+	push @$cmd, $ctx->{-path} if $ctx->{-path} ne '';
+	my $qsp = PublicInbox::Qspawn->new($cmd);
+	$qsp->psgi_qx($ctx->{env}, undef, \&rd_404_log, $ctx);
+}
 
 sub tree_30x { # git check_async callback
 	my ($oid, $type, $size, $ctx) = @_;
+	return find_missing($ctx) if $type eq 'missing';
 	my $wcb = delete $ctx->{-wcb};
-	return $wcb->(r(404)) if $type eq 'missing';
 	my $u = $ctx->{git}->base_url($ctx->{env});
 	my $path = uri_escape_path(delete $ctx->{-path});
 	$u .= "$oid/s/?b=$path";
@@ -23,6 +62,7 @@ sub srv_tree {
 	my ($ctx, $path) = @_;
 	return if index($path, '//') >= 0 || index($path, '/') == 0;
 	my $tip = $ctx->{qp}->{h} // 'HEAD';
+	$ctx->{-upfx} = '../' x (($path =~ tr!/!/!) + 1);
 	$path =~ s!/\z!!;
 	my $obj = $ctx->{-obj} = "$tip:$path";
 	$ctx->{-path} = $path;

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH 3/3] www_coderepo: /tree/ 404s search git history
  2023-01-12 14:14 ` [PATCH 3/3] www_coderepo: /tree/ 404s search git history Eric Wong
@ 2023-01-12 14:19   ` Eric Wong
  0 siblings, 0 replies; 5+ messages in thread
From: Eric Wong @ 2023-01-12 14:19 UTC (permalink / raw)
  To: meta

Eric Wong <e@80x24.org> wrote:
> +		print $zfh <<EOM;
> +found last record of `$path' in the following commit:
> +<a href="$ctx->{-upfx}$H/s/?b=$x">$h</a> $s_as
> +EOM

I think an extra newline works better, here:

diff --git a/lib/PublicInbox/RepoTree.pm b/lib/PublicInbox/RepoTree.pm
index 5b502a45..cec71eb6 100644
--- a/lib/PublicInbox/RepoTree.pm
+++ b/lib/PublicInbox/RepoTree.pm
@@ -30,6 +30,7 @@ sub rd_404_log {
 		$s_as = ascii_html($s_as);
 		print $zfh <<EOM;
 found last record of `$path' in the following commit:
+
 <a href="$ctx->{-upfx}$H/s/?b=$x">$h</a> $s_as
 EOM
 	}

^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2023-01-12 14:19 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-01-12 14:14 [PATCH 0/3] coderepo: cgit-compatible /tree/ redirect Eric Wong
2023-01-12 14:14 ` [PATCH 1/3] www_stream: coderepo-specific top bar Eric Wong
2023-01-12 14:14 ` [PATCH 2/3] www_coderepo: /tree/ redirects to /$OID/s/ Eric Wong
2023-01-12 14:14 ` [PATCH 3/3] www_coderepo: /tree/ 404s search git history Eric Wong
2023-01-12 14:19   ` Eric Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).