* [PATCH 0/3] coderepo: cgit-compatible /tree/ redirect
@ 2023-01-12 14:14 Eric Wong
2023-01-12 14:14 ` [PATCH 1/3] www_stream: coderepo-specific top bar Eric Wong
` (2 more replies)
0 siblings, 3 replies; 5+ messages in thread
From: Eric Wong @ 2023-01-12 14:14 UTC (permalink / raw)
To: meta
I much prefer $REPO/$OID/s/ URLs since they're stable, but we'll
need to handle /tree/ when migrating from cgit.
3/3 adds some git-aware 404 handling for deleted||renamed files;
hopefully it's not too expensive. It might be a first...
For example, a recent deletion:
https://80x24.org/lore/pub/scm/git/git.git/tree/git-bisect.sh
vs. one from 2005:
https://80x24.org/lore/pub/scm/git/git.git/tree/Documentation/git-mkdelta.txt
Eric Wong (3):
www_stream: coderepo-specific top bar
www_coderepo: /tree/ redirects to /$OID/s/
www_coderepo: /tree/ 404s search git history
MANIFEST | 1 +
lib/PublicInbox/GitAsyncCat.pm | 2 +-
lib/PublicInbox/RepoTree.pm | 84 ++++++++++++++++++++++++++++++++++
lib/PublicInbox/ViewVCS.pm | 23 ++++++----
lib/PublicInbox/WwwCoderepo.pm | 15 +++---
lib/PublicInbox/WwwStream.pm | 53 ++++++++++++++++++++-
t/solver_git.t | 7 +++
7 files changed, 167 insertions(+), 18 deletions(-)
create mode 100644 lib/PublicInbox/RepoTree.pm
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 1/3] www_stream: coderepo-specific top bar
2023-01-12 14:14 [PATCH 0/3] coderepo: cgit-compatible /tree/ redirect Eric Wong
@ 2023-01-12 14:14 ` Eric Wong
2023-01-12 14:14 ` [PATCH 2/3] www_coderepo: /tree/ redirects to /$OID/s/ Eric Wong
2023-01-12 14:14 ` [PATCH 3/3] www_coderepo: /tree/ 404s search git history Eric Wong
2 siblings, 0 replies; 5+ messages in thread
From: Eric Wong @ 2023-01-12 14:14 UTC (permalink / raw)
To: meta
It gets nasty when multiple, non-ALL lists point to the same
coderepo, but I guess ALL exists for that. Only lightly-tested
with various PSGI prefix mounts, but it seems to be working...
---
lib/PublicInbox/ViewVCS.pm | 23 ++++++++++-----
lib/PublicInbox/WwwCoderepo.pm | 9 +-----
lib/PublicInbox/WwwStream.pm | 53 +++++++++++++++++++++++++++++++++-
3 files changed, 68 insertions(+), 17 deletions(-)
diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm
index 6b641b32..7ac719bc 100644
--- a/lib/PublicInbox/ViewVCS.pm
+++ b/lib/PublicInbox/ViewVCS.pm
@@ -156,10 +156,14 @@ sub show_commit_start { # ->psgi_qx callback
sub ibx_url_for {
my ($ctx) = @_;
$ctx->{ibx} and return; # fall back to $upfx
- $ctx->{git} or return;
+ $ctx->{git} or die 'BUG: no {git}';
if (my $ALL = $ctx->{www}->{pi_cfg}->ALL) {
- return $ALL->base_url // $ALL->base_url($ctx->{env});
- } elsif (my $ibx_names = $ctx->{git}->{ibx_names}) {
+ if (defined(my $u = $ALL->base_url($ctx->{env}))) {
+ return wantarray ? ($u) : $u;
+ }
+ }
+ my @ret;
+ if (my $ibx_names = $ctx->{git}->{ibx_names}) {
my $by_name = $ctx->{www}->{pi_cfg}->{-by_name};
for my $name (@$ibx_names) {
my $ibx = $by_name->{$name} // do {
@@ -167,12 +171,13 @@ sub ibx_url_for {
next;
};
$ibx->isrch // next;
- return defined($ibx->{url}) ?
- prurl($ctx->{env}, $ibx->{url}) :
- "../../../$name/";
+ my $u = defined($ibx->{url}) ?
+ prurl($ctx->{env}, $ibx->{url}) : $name;
+ $u .= '/' if substr($u, -1) ne '/';
+ push @ret, $u;
}
}
- undef;
+ wantarray ? (@ret) : $ret[0];
}
sub cmt_finalize {
@@ -253,8 +258,10 @@ EOM
my $ibx_url = ibx_url_for($ctx);
my $alt;
if (defined $ibx_url) {
+ $alt = " `$ibx_url'";
+ $ibx_url =~ m!://! or
+ substr($ibx_url, 0, 0, '../../../');
$ibx_url = ascii_html($ibx_url);
- $alt = ' '.$ibx_url;
} else {
$ibx_url = $upfx;
$alt = '';
diff --git a/lib/PublicInbox/WwwCoderepo.pm b/lib/PublicInbox/WwwCoderepo.pm
index e89a6456..2fba0cd0 100644
--- a/lib/PublicInbox/WwwCoderepo.pm
+++ b/lib/PublicInbox/WwwCoderepo.pm
@@ -79,14 +79,7 @@ sub summary_finish {
$tip_html .= ' '.ascii_html($tip).' --';
}
print $zfh <<EOM;
-<pre>
-<a
-href='#readme'>about</a> <a
-href='#heads'>heads</a> <a
-href='#tags'>tags</a>
-
-<a
-id=log>\$</a> git log --pretty=format:'%h %s (%cs)%d'$tip_html
+<pre><a id=log>\$</a> git log --pretty=format:'%h %s (%cs)%d'$tip_html
EOM
for (@r) {
my $d; # decorations
diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm
index 59edad5d..8c40096a 100644
--- a/lib/PublicInbox/WwwStream.pm
+++ b/lib/PublicInbox/WwwStream.pm
@@ -38,9 +38,60 @@ sub async_eml { # for async_blob_cb
$ctx->write($ctx->{cb}->($ctx, $eml));
}
+sub html_repo_top ($) {
+ my ($ctx) = @_;
+ my $git = $ctx->{git};
+ my $desc = ascii_html($git->description);
+ my $title = delete($ctx->{-title_html}) // $desc;
+ my $upfx = $ctx->{-upfx} // '';
+ my $atom = $ctx->{-atom} // (substr($upfx, -1) eq '/' ?
+ "${upfx}atom/" : "$upfx/atom/");
+ my $top = ascii_html($git->{nick});
+ $top = qq(<a\nhref="$upfx">$top</a>) if length($upfx);
+ $top .= <<EOM;
+ <a href='$upfx#readme'>about</a> / <a
+href='$upfx#heads'>heads</a> / <a
+href='$upfx#tags'>tags</a>
+<b>$desc</b>
+EOM
+ my @url = PublicInbox::ViewVCS::ibx_url_for($ctx);
+ if (@url) {
+ $ctx->{-has_srch} = 1;
+ my $base_url = base_url($ctx);
+ my ($pfx, $sfx) = ($base_url =~ m!\A(https?://[^/]+/)(.*)\z!i);
+ my $iupfx = '../' x (($sfx =~ tr!/!/!) + 1);
+ $pfx = ascii_html($pfx);
+ $pfx = qr/\A\Q$pfx\E/i;
+ my $tmp = $top;
+ $top = '';
+ my ($s, $u, $same_host);
+ my $q_val = delete($ctx->{-q_value_html}) // '';
+ $q_val = qq(\nvalue="$q_val") if $q_val ne '';
+ for (@url) {
+ $u = $s = ascii_html($_);
+ substr($u, 0, 0, $iupfx) if $u !~ m!://!;
+ $s =~ s!$pfx!!;
+ $s =~ s!/\z!!;
+ $top .= qq{<form\naction="$u"><pre>$tmp} .
+ qq{<input\nname=q type=text$q_val />} .
+ qq{<input type=submit\n} .
+ qq{value="search mail in `$s'"/>} .
+ q{</pre></form>};
+ $tmp = '';
+ }
+ } else {
+ $top = "<pre>$top</pre>";
+ }
+ "<html><head><title>$title</title>" .
+ qq(<link\nrel=alternate\ntitle="Atom feed"\n).
+ qq(href="$atom"\ntype="application/atom+xml"/>) .
+ $ctx->{www}->style($upfx) .
+ '</head><body>'.$top;
+}
+
sub html_top ($) {
my ($ctx) = @_;
- my $ibx = $ctx->{ibx} // $ctx->{git};
+ my $ibx = $ctx->{ibx} // return html_repo_top($ctx);
my $desc = ascii_html($ibx->description);
my $title = delete($ctx->{-title_html}) // $desc;
my $upfx = $ctx->{-upfx} || '';
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 2/3] www_coderepo: /tree/ redirects to /$OID/s/
2023-01-12 14:14 [PATCH 0/3] coderepo: cgit-compatible /tree/ redirect Eric Wong
2023-01-12 14:14 ` [PATCH 1/3] www_stream: coderepo-specific top bar Eric Wong
@ 2023-01-12 14:14 ` Eric Wong
2023-01-12 14:14 ` [PATCH 3/3] www_coderepo: /tree/ 404s search git history Eric Wong
2 siblings, 0 replies; 5+ messages in thread
From: Eric Wong @ 2023-01-12 14:14 UTC (permalink / raw)
To: meta
This is for compatibility with cgit to ease migration.
---
MANIFEST | 1 +
lib/PublicInbox/GitAsyncCat.pm | 2 +-
lib/PublicInbox/RepoTree.pm | 44 ++++++++++++++++++++++++++++++++++
lib/PublicInbox/WwwCoderepo.pm | 6 +++++
t/solver_git.t | 7 ++++++
5 files changed, 59 insertions(+), 1 deletion(-)
create mode 100644 lib/PublicInbox/RepoTree.pm
diff --git a/MANIFEST b/MANIFEST
index 3626e4d2..c494d6f7 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -311,6 +311,7 @@ lib/PublicInbox/Qspawn.pm
lib/PublicInbox/Reply.pm
lib/PublicInbox/RepoAtom.pm
lib/PublicInbox/RepoSnapshot.pm
+lib/PublicInbox/RepoTree.pm
lib/PublicInbox/SaPlugin/ListMirror.pm
lib/PublicInbox/SaPlugin/ListMirror.pod
lib/PublicInbox/Search.pm
diff --git a/lib/PublicInbox/GitAsyncCat.pm b/lib/PublicInbox/GitAsyncCat.pm
index 2e0725a6..6dda7340 100644
--- a/lib/PublicInbox/GitAsyncCat.pm
+++ b/lib/PublicInbox/GitAsyncCat.pm
@@ -75,7 +75,7 @@ sub ibx_async_cat ($$$$) {
}
sub async_check ($$$$) {
- my ($ibx, $oidish, $cb, $arg) = @_;
+ my ($ibx, $oidish, $cb, $arg) = @_; # $ibx may be $ctx
my $git = $ibx->{git} // $ibx->git;
$git->check_async($oidish, $cb, $arg);
$git->{async_chk} //= do {
diff --git a/lib/PublicInbox/RepoTree.pm b/lib/PublicInbox/RepoTree.pm
new file mode 100644
index 00000000..7f2ff206
--- /dev/null
+++ b/lib/PublicInbox/RepoTree.pm
@@ -0,0 +1,44 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+#
+# cgit-compatible $REPO/tree/[PATH]?h=$tip redirector
+package PublicInbox::RepoTree;
+use v5.12;
+use PublicInbox::ViewDiff qw(uri_escape_path);
+use PublicInbox::GitAsyncCat;
+use PublicInbox::WwwStatic qw(r);
+
+sub tree_30x { # git check_async callback
+ my ($oid, $type, $size, $ctx) = @_;
+ my $wcb = delete $ctx->{-wcb};
+ return $wcb->(r(404)) if $type eq 'missing';
+ my $u = $ctx->{git}->base_url($ctx->{env});
+ my $path = uri_escape_path(delete $ctx->{-path});
+ $u .= "$oid/s/?b=$path";
+ $wcb->([ 302, [ Location => $u, 'Content-Type' => 'text/plain' ],
+ [ "Redirecting to $u\n" ] ])
+}
+
+sub srv_tree {
+ my ($ctx, $path) = @_;
+ return if index($path, '//') >= 0 || index($path, '/') == 0;
+ my $tip = $ctx->{qp}->{h} // 'HEAD';
+ $path =~ s!/\z!!;
+ my $obj = $ctx->{-obj} = "$tip:$path";
+ $ctx->{-path} = $path;
+
+ # "\n" breaks with `git cat-file --batch-check', and there's no
+ # legitimate use of "\n" in filenames anyways.
+ return if index($obj, "\n") >= 0;
+ sub {
+ $ctx->{-wcb} = $_[0]; # HTTP::{Chunked,Identity}
+ if ($ctx->{env}->{'pi-httpd.async'}) {
+ async_check($ctx, $obj, \&tree_30x, $ctx);
+ } else {
+ $ctx->{git}->check_async($obj, \&tree_30x, $ctx);
+ $ctx->{git}->async_wait_all;
+ }
+ };
+}
+
+1;
diff --git a/lib/PublicInbox/WwwCoderepo.pm b/lib/PublicInbox/WwwCoderepo.pm
index 2fba0cd0..668b6398 100644
--- a/lib/PublicInbox/WwwCoderepo.pm
+++ b/lib/PublicInbox/WwwCoderepo.pm
@@ -18,6 +18,7 @@ use PublicInbox::Hval qw(ascii_html);
use PublicInbox::ViewDiff qw(uri_escape_path);
use PublicInbox::RepoSnapshot;
use PublicInbox::RepoAtom;
+use PublicInbox::RepoTree;
my $EACH_REF = "git for-each-ref --sort=-creatordate --format='%(HEAD)%00".
join('%00', map { "%($_)" }
@@ -226,6 +227,11 @@ sub srv { # endpoint called by PublicInbox::WWW
($ctx->{git} = $cr->{$1}) and
return PublicInbox::ViewVCS::show($ctx, $2);
+ if ($path_info =~ m!\A/(.+?)/tree/(.*)\z! and
+ ($ctx->{git} = $cr->{$1})) {
+ return PublicInbox::RepoTree::srv_tree($ctx, $2) // r(404);
+ }
+
# snapshots:
if ($path_info =~ m!\A/(.+?)/snapshot/([^/]+)\z! and
($ctx->{git} = $cr->{$1})) {
diff --git a/t/solver_git.t b/t/solver_git.t
index 89ed0362..5519fa18 100644
--- a/t/solver_git.t
+++ b/t/solver_git.t
@@ -381,6 +381,13 @@ EOF
$res = $cb->(GET('/public-inbox/atom/README.md'));
is($res->code, 404, '404 on non-existent file Atom feed');
}
+
+ $res = $cb->(GET('/public-inbox/tree/'));
+ is($res->code, 302, 'got redirect');
+ $res = $cb->(GET('/public-inbox/tree/README'));
+ is($res->code, 302, 'got redirect for regular file');
+ $res = $cb->(GET('/public-inbox/tree/Documentation'));
+ is($res->code, 302, 'got redirect for directory');
};
test_psgi(sub { $www->call(@_) }, $client);
my $env = { PI_CONFIG => $cfgpath, TMPDIR => $tmpdir };
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 3/3] www_coderepo: /tree/ 404s search git history
2023-01-12 14:14 [PATCH 0/3] coderepo: cgit-compatible /tree/ redirect Eric Wong
2023-01-12 14:14 ` [PATCH 1/3] www_stream: coderepo-specific top bar Eric Wong
2023-01-12 14:14 ` [PATCH 2/3] www_coderepo: /tree/ redirects to /$OID/s/ Eric Wong
@ 2023-01-12 14:14 ` Eric Wong
2023-01-12 14:19 ` Eric Wong
2 siblings, 1 reply; 5+ messages in thread
From: Eric Wong @ 2023-01-12 14:14 UTC (permalink / raw)
To: meta
Displaying git trees over the web with pathnames in the URLs
have the unfortunate consequence of URLs getting out-of-date
if files are renamed or deleted from the latest tree.
We can utilize `git log' here to search history and find the
commit which led to the rename or deletion. Of course, we'll
show a suitable command to the user as well, another small
step towards covertly teaching users the git CLI :>
`git log' is not especially fast, here, but Qspawn limiters can
do their job and renames and deletions aren't too common in most
codebases.
---
lib/PublicInbox/RepoTree.pm | 42 ++++++++++++++++++++++++++++++++++++-
1 file changed, 41 insertions(+), 1 deletion(-)
diff --git a/lib/PublicInbox/RepoTree.pm b/lib/PublicInbox/RepoTree.pm
index 7f2ff206..5b502a45 100644
--- a/lib/PublicInbox/RepoTree.pm
+++ b/lib/PublicInbox/RepoTree.pm
@@ -7,11 +7,50 @@ use v5.12;
use PublicInbox::ViewDiff qw(uri_escape_path);
use PublicInbox::GitAsyncCat;
use PublicInbox::WwwStatic qw(r);
+use PublicInbox::Qspawn;
+use PublicInbox::WwwStream qw(html_oneshot);
+use PublicInbox::Hval qw(ascii_html);
+
+sub rd_404_log {
+ my ($bref, $ctx) = @_;
+ my $path = $ctx->{-q_value_html} = ascii_html($ctx->{-path});
+ my $tip = 'HEAD';
+ $tip = ascii_html($ctx->{qp}->{h}) if defined($ctx->{qp}->{h});
+ PublicInbox::WwwStream::html_init($ctx);
+ my $zfh = $ctx->{zfh};
+ print $zfh "<pre>\$ git log -1 $tip -- $path\n";
+ if ($$bref eq '') {
+ say $zfh "found no record of `$path' in git history";
+ $ctx->{-has_srch} and
+ say $zfh 'perhaps try searching mail (above)';
+ } else {
+ my ($H, $h, $s_as) = split(/ /, $$bref, 3);
+ utf8::decode($s_as);
+ my $x = uri_escape_path($ctx->{-path});
+ $s_as = ascii_html($s_as);
+ print $zfh <<EOM;
+found last record of `$path' in the following commit:
+<a href="$ctx->{-upfx}$H/s/?b=$x">$h</a> $s_as
+EOM
+ }
+ delete($ctx->{-wcb})->($ctx->html_done);
+}
+
+sub find_missing {
+ my ($ctx) = @_;
+ my $cmd = ['git', "--git-dir=$ctx->{git}->{git_dir}",
+ qw(log --no-color -1), '--pretty=%H %h %s (%as)' ];
+ push @$cmd, $ctx->{qp}->{h} if defined($ctx->{qp}->{h});
+ push @$cmd, '--';
+ push @$cmd, $ctx->{-path} if $ctx->{-path} ne '';
+ my $qsp = PublicInbox::Qspawn->new($cmd);
+ $qsp->psgi_qx($ctx->{env}, undef, \&rd_404_log, $ctx);
+}
sub tree_30x { # git check_async callback
my ($oid, $type, $size, $ctx) = @_;
+ return find_missing($ctx) if $type eq 'missing';
my $wcb = delete $ctx->{-wcb};
- return $wcb->(r(404)) if $type eq 'missing';
my $u = $ctx->{git}->base_url($ctx->{env});
my $path = uri_escape_path(delete $ctx->{-path});
$u .= "$oid/s/?b=$path";
@@ -23,6 +62,7 @@ sub srv_tree {
my ($ctx, $path) = @_;
return if index($path, '//') >= 0 || index($path, '/') == 0;
my $tip = $ctx->{qp}->{h} // 'HEAD';
+ $ctx->{-upfx} = '../' x (($path =~ tr!/!/!) + 1);
$path =~ s!/\z!!;
my $obj = $ctx->{-obj} = "$tip:$path";
$ctx->{-path} = $path;
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH 3/3] www_coderepo: /tree/ 404s search git history
2023-01-12 14:14 ` [PATCH 3/3] www_coderepo: /tree/ 404s search git history Eric Wong
@ 2023-01-12 14:19 ` Eric Wong
0 siblings, 0 replies; 5+ messages in thread
From: Eric Wong @ 2023-01-12 14:19 UTC (permalink / raw)
To: meta
Eric Wong <e@80x24.org> wrote:
> + print $zfh <<EOM;
> +found last record of `$path' in the following commit:
> +<a href="$ctx->{-upfx}$H/s/?b=$x">$h</a> $s_as
> +EOM
I think an extra newline works better, here:
diff --git a/lib/PublicInbox/RepoTree.pm b/lib/PublicInbox/RepoTree.pm
index 5b502a45..cec71eb6 100644
--- a/lib/PublicInbox/RepoTree.pm
+++ b/lib/PublicInbox/RepoTree.pm
@@ -30,6 +30,7 @@ sub rd_404_log {
$s_as = ascii_html($s_as);
print $zfh <<EOM;
found last record of `$path' in the following commit:
+
<a href="$ctx->{-upfx}$H/s/?b=$x">$h</a> $s_as
EOM
}
^ permalink raw reply related [flat|nested] 5+ messages in thread
end of thread, other threads:[~2023-01-12 14:19 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-01-12 14:14 [PATCH 0/3] coderepo: cgit-compatible /tree/ redirect Eric Wong
2023-01-12 14:14 ` [PATCH 1/3] www_stream: coderepo-specific top bar Eric Wong
2023-01-12 14:14 ` [PATCH 2/3] www_coderepo: /tree/ redirects to /$OID/s/ Eric Wong
2023-01-12 14:14 ` [PATCH 3/3] www_coderepo: /tree/ 404s search git history Eric Wong
2023-01-12 14:19 ` Eric Wong
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).