This is to eventually support M:N inbox:coderepo search via -extindex. For now, it's a JS-free and CSS-optional cgit-like UI, but perhaps more brutalist :> It's mainly for wiring /$INBOX/$OID/s/ into /$CODEREPO/$OID/s/, but Atom feeds and such will be supported. It'll work better with coderepos with a ".git" name suffix to avoid conflicts with the inbox name. So [coderepo "git"] becomes [coderepo "git.git"] in my config file. cgitrc usage should continue working, I think... It looks better out-of-the-box on w3m since cgit relies too much on CSS :> Example here: https://80x24.org/lore/git.git Eric Wong (10): tests: use test_httpd consistently cgit: use Perl 5.10-isms, optimize, and golf git: hoist out description git: move cloneurl + description reading here www_coderepo: an alternative to cgit www_coderepo: wire up /$CODEREPO/$OID/s/ endpoint git: allow ->local_nick to return undef www_coderepo: wire up snapshot support www_stream: use git->pub_urls for coderepo links www_coderepo: start a top nav bar in summary view MANIFEST | 2 + lib/PublicInbox/Cgit.pm | 33 ++--- lib/PublicInbox/Config.pm | 2 +- lib/PublicInbox/ExtSearch.pm | 2 +- lib/PublicInbox/Git.pm | 53 +++++++-- lib/PublicInbox/GitAsyncCat.pm | 66 ++++++++-- lib/PublicInbox/Inbox.pm | 23 +--- lib/PublicInbox/RepoSnapshot.pm | 95 +++++++++++++++ lib/PublicInbox/SolverGit.pm | 8 +- lib/PublicInbox/TestCommon.pm | 14 ++- lib/PublicInbox/ViewVCS.pm | 1 + lib/PublicInbox/WWW.pm | 12 +- lib/PublicInbox/WwwCoderepo.pm | 205 ++++++++++++++++++++++++++++++++ lib/PublicInbox/WwwStream.pm | 33 ++--- t/init.t | 2 +- t/lei-mirror.t | 10 +- t/psgi_attach.t | 13 +- t/solver_git.t | 70 ++++++++--- t/www_altid.t | 13 +- xt/solver.t | 18 +-- 20 files changed, 533 insertions(+), 142 deletions(-) create mode 100644 lib/PublicInbox/RepoSnapshot.pm create mode 100644 lib/PublicInbox/WwwCoderepo.pm
This allows us to consolidate our checks for Plack::Test::ExternalServer and enforce our redirect-disabled LWP::UserAgent. --- lib/PublicInbox/TestCommon.pm | 14 +++++++++----- t/psgi_attach.t | 13 +++---------- t/solver_git.t | 17 ++++------------- t/www_altid.t | 13 +++---------- xt/solver.t | 18 ++++-------------- 5 files changed, 23 insertions(+), 52 deletions(-) diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm index 333791b4..abf4f364 100644 --- a/lib/PublicInbox/TestCommon.pm +++ b/lib/PublicInbox/TestCommon.pm @@ -743,11 +743,14 @@ sub create_inbox ($$;@) { $ibx; } -sub test_httpd ($$;$) { - my ($env, $client, $skip) = @_; - for (qw(PI_CONFIG TMPDIR)) { - $env->{$_} or BAIL_OUT "$_ unset"; - } +sub test_httpd ($$;$$) { + my ($env, $client, $skip, $cb) = @_; + my ($tmpdir, $for_destroy); + $env->{TMPDIR} //= do { + ($tmpdir, $for_destroy) = tmpdir(); + $tmpdir; + }; + for (qw(PI_CONFIG)) { $env->{$_} or BAIL_OUT "$_ unset" } SKIP: { require_mods(qw(Plack::Test::ExternalServer LWP::UserAgent), $skip // 1); @@ -761,6 +764,7 @@ sub test_httpd ($$;$) { $ua->max_redirect(0); Plack::Test::ExternalServer::test_psgi(client => $client, ua => $ua); + $cb->() if $cb; $td->join('TERM'); open my $fh, '<', $err or BAIL_OUT $!; my $e = do { local $/; <$fh> }; diff --git a/t/psgi_attach.t b/t/psgi_attach.t index 79665d6f..db551696 100644 --- a/t/psgi_attach.t +++ b/t/psgi_attach.t @@ -1,5 +1,5 @@ #!perl -w -# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org> +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> use strict; use v5.10.1; @@ -97,19 +97,12 @@ my $client = sub { test_psgi(sub { $www->call(@_) }, $client); SKIP: { - require_mods(qw(DBD::SQLite Plack::Test::ExternalServer), 18); + require_mods(qw(DBD::SQLite), 18); $ibx = create_inbox 'test-indexed', indexlevel => 'basic', $creat_cb; $cfgpath = "$ibx->{inboxdir}/pi_config"; my $env = { PI_CONFIG => $cfgpath }; $www = PublicInbox::WWW->new(PublicInbox::Config->new($cfgpath)); test_psgi(sub { $www->call(@_) }, $client); - my $sock = tcp_server() or die; - my ($tmpdir, $for_destroy) = tmpdir(); - my ($out, $err) = map { "$tmpdir/std$_.log" } qw(out err); - my $cmd = [ qw(-httpd -W0), "--stdout=$out", "--stderr=$err" ]; - my $td = start_script($cmd, $env, { 3 => $sock }); - my ($h, $p) = tcp_host_port($sock); - local $ENV{PLACK_TEST_EXTERNALSERVER_URI} = "http://$h:$p"; - Plack::Test::ExternalServer::test_psgi(client => $client); + test_httpd($env, $client); } done_testing; diff --git a/t/solver_git.t b/t/solver_git.t index 958af065..e347c711 100644 --- a/t/solver_git.t +++ b/t/solver_git.t @@ -302,29 +302,20 @@ EOF 'UTF-8 commit shown properly'); }; test_psgi(sub { $www->call(@_) }, $client); + my $env = { PI_CONFIG => $cfgpath, TMPDIR => $tmpdir }; + test_httpd($env, $client, 7, sub { SKIP: { - require_mods(qw(Plack::Test::ExternalServer), 7); - my $env = { PI_CONFIG => $cfgpath }; - my $sock = tcp_server() or die; - my ($out, $err) = map { "$tmpdir/std$_.log" } qw(out err); - my $cmd = [ qw(-httpd -W0), "--stdout=$out", "--stderr=$err" ]; - my $td = start_script($cmd, $env, { 3 => $sock }); - my ($h, $p) = tcp_host_port($sock); - my $url = "http://$h:$p"; - local $ENV{PLACK_TEST_EXTERNALSERVER_URI} = $url; - Plack::Test::ExternalServer::test_psgi(client => $client); require_cmd('curl', 1) or skip 'no curl', 1; - mkdir "$tmpdir/ext" // xbail "mkdir $!"; + my $rurl = "$ENV{PLACK_TEST_EXTERNALSERVER_URI}/$name"; test_lei({tmpdir => "$tmpdir/ext"}, sub { - my $rurl = "$url/$name"; lei_ok(qw(blob --no-mail 69df7d5 -I), $rurl); is(git_sha(1, \$lei_out)->hexdigest, $expect, 'blob contents output'); ok(!lei(qw(blob -I), $rurl, $non_existent), 'non-existent blob fails'); }); - } + }}); } done_testing(); diff --git a/t/www_altid.t b/t/www_altid.t index 94a2e807..de1e6ed6 100644 --- a/t/www_altid.t +++ b/t/www_altid.t @@ -1,5 +1,5 @@ #!perl -w -# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org> +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> use strict; use v5.10.1; use PublicInbox::TestCommon; use PublicInbox::Config; @@ -59,14 +59,7 @@ my $client = sub { }; test_psgi(sub { $www->call(@_) }, $client); SKIP: { - require_mods(qw(Plack::Test::ExternalServer), 4); - my $env = { PI_CONFIG => $cfgpath }; - my $sock = tcp_server() or die; - my ($out, $err) = map { "$tmpdir/std$_.log" } qw(out err); - my $cmd = [ qw(-httpd -W0), "--stdout=$out", "--stderr=$err" ]; - my $td = start_script($cmd, $env, { 3 => $sock }); - my ($h, $p) = tcp_host_port($sock); - local $ENV{PLACK_TEST_EXTERNALSERVER_URI} = "http://$h:$p"; - Plack::Test::ExternalServer::test_psgi(client => $client); + my $env = { PI_CONFIG => $cfgpath, TMPDIR => $tmpdir }; + test_httpd($env, $client); } done_testing; diff --git a/xt/solver.t b/xt/solver.t index 32cd43cf..c76e0b0a 100644 --- a/xt/solver.t +++ b/xt/solver.t @@ -57,20 +57,10 @@ while (($ibx_name, $urls) = each %$todo) { } } -SKIP: { - require_mods(qw(Plack::Test::ExternalServer), $nr); - delete @$todo{@gone}; - - my $sock = tcp_server() or BAIL_OUT $!; - my ($tmpdir, $for_destroy) = tmpdir(); - my ($out, $err) = map { "$tmpdir/std$_.log" } qw(out err); - my $cmd = [ qw(-httpd -W0), "--stdout=$out", "--stderr=$err" ]; - my $td = start_script($cmd, undef, { 3 => $sock }); - my ($h, $p) = tcp_host_port($sock); - local $ENV{PLACK_TEST_EXTERNALSERVER_URI} = "http://$h:$p"; - while (($ibx_name, $urls) = each %$todo) { - Plack::Test::ExternalServer::test_psgi(client => $client); - } +delete @$todo{@gone}; +my $env = { PI_CONFIG => PublicInbox::Config->default_file }; +while (($ibx_name, $urls) = each %$todo) { + test_httpd($env, $client, $nr); } done_testing();
We can reduce variable assignments in a few places and filter keys more quickly using the `grep' Perl op rather than relying on `m// or next' inside a loop. Similar changes to the NNTP and IMAP (e.g. b700fce60f25038e (nntp: NEWNEWS: speed up filtering, 2020-11-27)) yielded good improvements. --- lib/PublicInbox/Cgit.pm | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/lib/PublicInbox/Cgit.pm b/lib/PublicInbox/Cgit.pm index cc729aa2..a63f8902 100644 --- a/lib/PublicInbox/Cgit.pm +++ b/lib/PublicInbox/Cgit.pm @@ -1,4 +1,4 @@ -# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org> +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> # wrapper for cgit(1) and git-http-backend(1) for browsing and @@ -6,7 +6,7 @@ # directive to be set in the public-inbox config file. package PublicInbox::Cgit; -use strict; +use v5.12; use PublicInbox::GitHTTPBackend; use PublicInbox::Git; # not bothering with Exporter for a one-off @@ -40,10 +40,9 @@ sub locate_cgit ($) { if (defined($cgit_bin) && $cgit_bin =~ m!\A(.+?)/[^/]+\z!) { unshift @dirs, $1 if -d $1; } - foreach my $d (@dirs) { - my $f = "$d/cgit.css"; - next unless -f $f; - $cgit_data = $d; + for (@dirs) { + next unless -f "$_/cgit.css"; + $cgit_data = $_; last; } } @@ -65,17 +64,15 @@ sub new { # some cgit repos may not be mapped to inboxes, so ensure those exist: my $code_repos = $pi_cfg->{-code_repos}; - foreach my $k (keys %$pi_cfg) { - $k =~ /\Acoderepo\.(.+)\.dir\z/ or next; - my $dir = $pi_cfg->{$k}; - $code_repos->{$1} ||= $pi_cfg->fill_code_repo($1); + for my $k (grep(/\Acoderepo\.(?:.+)\.dir\z/, keys %$pi_cfg)) { + $k = substr($k, length('coderepo.'), -length('.dir')); + $code_repos->{$k} //= $pi_cfg->fill_code_repo($k); } while (my ($nick, $repo) = each %$code_repos) { $self->{"\0$nick"} = $repo; } - my $cgit_static = $pi_cfg->{-cgit_static}; - my $static = join('|', map { quotemeta $_ } keys %$cgit_static); - $self->{static} = qr/\A($static)\z/; + my $s = join('|', map { quotemeta } keys %{$pi_cfg->{-cgit_static}}); + $self->{static} = qr/\A($s)\z/; $self; } @@ -114,7 +111,7 @@ sub call { my $cgi_env = { PATH_INFO => $path_info }; foreach (@PASS_ENV) { - defined(my $v = $env->{$_}) or next; + my $v = $env->{$_} // next; $cgi_env->{$_} = $v; } $cgi_env->{'HTTPS'} = 'on' if $env->{'psgi.url_scheme'} eq 'https';
We'll be using this separately, elsewhere. --- lib/PublicInbox/Git.pm | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm index 9140caea..78b47096 100644 --- a/lib/PublicInbox/Git.pm +++ b/lib/PublicInbox/Git.pm @@ -498,6 +498,15 @@ sub modified ($) { (split(/ /, <$fh> // time))[0] + 0; # integerize for JSON } +sub description { + my $desc = ''; + if (open(my $fh, '<:utf8', "$_[0]->{git_dir}/description")) { + local $/ = "\n"; + chomp($desc = <$fh> // ''); + } + $desc eq '' ? 'Unnamed repository' : $desc; +} + # for grokmirror, which doesn't read gitweb.description # templates/hooks--update.sample and git-multimail in git.git # only match "Unnamed repository", not the full contents of @@ -520,14 +529,8 @@ sub manifest_entry { chomp(my $owner = $self->qx('config', 'gitweb.owner')); utf8::decode($owner); $ent->{owner} = $owner eq '' ? undef : $owner; - my $desc = ''; - if (open($fh, '<', "$git_dir/description")) { - local $/ = "\n"; - chomp($desc = <$fh>); - utf8::decode($desc); - } - $desc = 'Unnamed repository' if $desc eq ''; - if (defined $epoch && $desc =~ /\AUnnamed repository/) { + my $desc = description($self); + if (defined $epoch && index($desc, 'Unnamed repository') == 0) { $desc = "$default_desc [epoch $epoch]"; } $ent->{description} = $desc;
We'll be using these functions for serving coderepos natively without cgit. --- lib/PublicInbox/ExtSearch.pm | 2 +- lib/PublicInbox/Git.pm | 30 ++++++++++++++++++++++++------ lib/PublicInbox/Inbox.pm | 23 ++++------------------- t/init.t | 2 +- t/lei-mirror.t | 10 +++++----- 5 files changed, 35 insertions(+), 32 deletions(-) diff --git a/lib/PublicInbox/ExtSearch.pm b/lib/PublicInbox/ExtSearch.pm index a69c0e76..fa49a1d0 100644 --- a/lib/PublicInbox/ExtSearch.pm +++ b/lib/PublicInbox/ExtSearch.pm @@ -108,7 +108,7 @@ sub altid_map { {} } sub description { my ($self) = @_; ($self->{description} //= - PublicInbox::Inbox::cat_desc("$self->{topdir}/description")) // + PublicInbox::Git::cat_desc("$self->{topdir}/description")) // '$EXTINDEX_DIR/description missing'; } diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm index 78b47096..2f0bb6a0 100644 --- a/lib/PublicInbox/Git.pm +++ b/lib/PublicInbox/Git.pm @@ -498,13 +498,31 @@ sub modified ($) { (split(/ /, <$fh> // time))[0] + 0; # integerize for JSON } +sub try_cat { + my ($path) = @_; + open(my $fh, '<', $path) or return ''; + local $/; + <$fh> // ''; +} + +sub cat_desc ($) { + my $desc = try_cat($_[0]); + chomp $desc; + utf8::decode($desc); + $desc =~ s/\s+/ /smg; + $desc eq '' ? undef : $desc; +} + sub description { - my $desc = ''; - if (open(my $fh, '<:utf8', "$_[0]->{git_dir}/description")) { - local $/ = "\n"; - chomp($desc = <$fh> // ''); - } - $desc eq '' ? 'Unnamed repository' : $desc; + cat_desc("$_[0]->{git_dir}/description") // 'Unnamed repository'; +} + +sub cloneurl { + my ($self) = @_; + $self->{cloneurl} // do { + my @urls = split(/\s+/s, try_cat("$self->{git_dir}/cloneurl")); + scalar(@urls) ? ($self->{cloneurl} = \@urls) : undef; + } // []; } # for grokmirror, which doesn't read gitweb.description diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm index 8ac7eb30..3532bb58 100644 --- a/lib/PublicInbox/Inbox.pm +++ b/lib/PublicInbox/Inbox.pm @@ -181,33 +181,18 @@ sub over { } // ($req ? croak("E: $@") : undef); } -sub try_cat { - my ($path) = @_; - open(my $fh, '<', $path) or return ''; - local $/; - <$fh> // ''; -} - -sub cat_desc ($) { - my $desc = try_cat($_[0]); - local $/ = "\n"; - chomp $desc; - utf8::decode($desc); - $desc =~ s/\s+/ /smg; - $desc eq '' ? undef : $desc; -} - sub description { my ($self) = @_; - ($self->{description} //= cat_desc("$self->{inboxdir}/description")) // + ($self->{description} //= + PublicInbox::Git::cat_desc("$self->{inboxdir}/description")) // '($INBOX_DIR/description missing)'; } sub cloneurl { my ($self) = @_; $self->{cloneurl} // do { - my $s = try_cat("$self->{inboxdir}/cloneurl"); - my @urls = split(/\s+/s, $s); + my @urls = split(/\s+/s, + PublicInbox::Git::try_cat("$self->{inboxdir}/cloneurl")); scalar(@urls) ? ($self->{cloneurl} = \@urls) : undef; } // []; } diff --git a/t/init.t b/t/init.t index 6f4c9dce..460c83f3 100644 --- a/t/init.t +++ b/t/init.t @@ -102,7 +102,7 @@ sub quiet_fail { umask($umask) // xbail "umask: $!"; ok(-d "$tmpdir/a/b/c/d", 'directory created'); my $desc = "$tmpdir/a/b/c/d/description"; - is(PublicInbox::Inbox::try_cat($desc), + is(PublicInbox::Git::try_cat($desc), "public inbox for abcd\@example.com\n", 'description set'); my $mode = (stat($desc))[2]; is(sprintf('0%03o', $mode & 0777), '0644', diff --git a/t/lei-mirror.t b/t/lei-mirror.t index 32a5b039..c172483b 100644 --- a/t/lei-mirror.t +++ b/t/lei-mirror.t @@ -22,7 +22,7 @@ test_lei({ tmpdir => $tmpdir }, sub { lei_ok('add-external', $t1, '--mirror', "$http/t1/", \'--mirror v1'); my $mm_dup = "$t1/public-inbox/msgmap.sqlite3"; ok(-f $mm_dup, 't1-mirror indexed'); - is(PublicInbox::Inbox::try_cat("$t1/description"), + is(PublicInbox::Git::try_cat("$t1/description"), "mirror of $http/t1/\n", 'description set'); ok(-f "$t1/Makefile", 'convenience Makefile added (v1)'); ok(-f "$t1/inbox.config.example", 'inbox.config.example downloaded'); @@ -43,7 +43,7 @@ test_lei({ tmpdir => $tmpdir }, sub { ok(-f $mm_dup, 't2-mirror indexed'); ok(-f "$t2/description", 't2 description'); ok(-f "$t2/Makefile", 'convenience Makefile added (v2)'); - is(PublicInbox::Inbox::try_cat("$t2/description"), + is(PublicInbox::Git::try_cat("$t2/description"), "mirror of $http/t2/\n", 'description set'); $tb = PublicInbox::Msgmap->new_file($mm_dup)->created_at; is($tb, $created{v2}, 'created_at matched in v2 mirror'); @@ -199,14 +199,14 @@ $td->join; my $exp = "mirror of https://example.com/src/\n"; my $f = "$tmpdir/description"; PublicInbox::LeiMirror::set_description($mrr); - is(PublicInbox::Inbox::try_cat($f), $exp, 'description set on ENOENT'); + is(PublicInbox::Git::try_cat($f), $exp, 'description set on ENOENT'); my $fh; (open($fh, '>', $f) and close($fh)) or xbail $!; PublicInbox::LeiMirror::set_description($mrr); - is(PublicInbox::Inbox::try_cat($f), $exp, 'description set on empty'); + is(PublicInbox::Git::try_cat($f), $exp, 'description set on empty'); (open($fh, '>', $f) and print $fh "x\n" and close($fh)) or xbail $!; - is(PublicInbox::Inbox::try_cat($f), "x\n", + is(PublicInbox::Git::try_cat($f), "x\n", 'description preserved if non-default'); }
This will allow it to easily map a single coderepo to multiple inboxes (or multiple coderepos to any number of inboxes). For now, this is just a summary, but $REPO/$OID/s/ support will be added, along with archive downloads. Indexing of coderepos will probably be supported via -extindex, only. --- MANIFEST | 1 + lib/PublicInbox/Cgit.pm | 14 +-- lib/PublicInbox/Config.pm | 2 +- lib/PublicInbox/Git.pm | 14 ++- lib/PublicInbox/GitAsyncCat.pm | 17 +-- lib/PublicInbox/WWW.pm | 12 ++- lib/PublicInbox/WwwCoderepo.pm | 185 +++++++++++++++++++++++++++++++++ lib/PublicInbox/WwwStream.pm | 16 ++- t/solver_git.t | 33 +++++- 9 files changed, 266 insertions(+), 28 deletions(-) create mode 100644 lib/PublicInbox/WwwCoderepo.pm diff --git a/MANIFEST b/MANIFEST index 35382d2d..cf6d97e1 100644 --- a/MANIFEST +++ b/MANIFEST @@ -342,6 +342,7 @@ lib/PublicInbox/Watch.pm lib/PublicInbox/WwwAltId.pm lib/PublicInbox/WwwAtomStream.pm lib/PublicInbox/WwwAttach.pm +lib/PublicInbox/WwwCoderepo.pm lib/PublicInbox/WwwHighlight.pm lib/PublicInbox/WwwListing.pm lib/PublicInbox/WwwStatic.pm diff --git a/lib/PublicInbox/Cgit.pm b/lib/PublicInbox/Cgit.pm index a63f8902..1112d9f8 100644 --- a/lib/PublicInbox/Cgit.pm +++ b/lib/PublicInbox/Cgit.pm @@ -7,6 +7,7 @@ package PublicInbox::Cgit; use v5.12; +use parent qw(PublicInbox::WwwCoderepo); use PublicInbox::GitHTTPBackend; use PublicInbox::Git; # not bothering with Exporter for a one-off @@ -52,10 +53,6 @@ sub locate_cgit ($) { sub new { my ($class, $pi_cfg) = @_; my ($cgit_bin, $cgit_data) = locate_cgit($pi_cfg); - # TODO: support gitweb and other repository viewers? - if (defined(my $cgitrc = $pi_cfg->{-cgitrc_unparsed})) { - $pi_cfg->parse_cgitrc($cgitrc, 0); - } my $self = bless { cmd => [ $cgit_bin ], cgit_data => $cgit_data, @@ -63,14 +60,7 @@ sub new { }, $class; # some cgit repos may not be mapped to inboxes, so ensure those exist: - my $code_repos = $pi_cfg->{-code_repos}; - for my $k (grep(/\Acoderepo\.(?:.+)\.dir\z/, keys %$pi_cfg)) { - $k = substr($k, length('coderepo.'), -length('.dir')); - $code_repos->{$k} //= $pi_cfg->fill_code_repo($k); - } - while (my ($nick, $repo) = each %$code_repos) { - $self->{"\0$nick"} = $repo; - } + PublicInbox::WwwCoderepo::prepare_coderepos($self); my $s = join('|', map { quotemeta } keys %{$pi_cfg->{-cgit_static}}); $self->{static} = qr/\A($s)\z/; $self; diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm index 1b5d87e2..42bd9438 100644 --- a/lib/PublicInbox/Config.pm +++ b/lib/PublicInbox/Config.pm @@ -343,7 +343,7 @@ sub fill_code_repo { $git->{cgit_url} = $cgits = _array($cgits); $self->{"$pfx.cgiturl"} = $cgits; } - + $git->{nick} = $nick; $git; } diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm index 2f0bb6a0..395add1f 100644 --- a/lib/PublicInbox/Git.pm +++ b/lib/PublicInbox/Git.pm @@ -463,6 +463,16 @@ sub host_prefix_url ($$) { "$scheme://$host_port". ($env->{SCRIPT_NAME} || '/') . $url; } +sub base_url { # for coderepos, PSGI-only + my ($self, $env) = @_; # env - PSGI env + my $url = host_prefix_url($env, ''); + # for mount in Plack::Builder + $url .= '/' if substr($url, -1, 1) ne '/'; + $url . $self->{nick} . '/'; +} + +sub isrch {} # TODO + sub pub_urls { my ($self, $env) = @_; if (my $urls = $self->{cgit_url}) { @@ -518,11 +528,11 @@ sub description { } sub cloneurl { - my ($self) = @_; + my ($self, $env) = @_; $self->{cloneurl} // do { my @urls = split(/\s+/s, try_cat("$self->{git_dir}/cloneurl")); scalar(@urls) ? ($self->{cloneurl} = \@urls) : undef; - } // []; + } // [ substr(base_url($self, $env), 0, -1) ]; } # for grokmirror, which doesn't read gitweb.description diff --git a/lib/PublicInbox/GitAsyncCat.pm b/lib/PublicInbox/GitAsyncCat.pm index b32c2fd3..613dbf7e 100644 --- a/lib/PublicInbox/GitAsyncCat.pm +++ b/lib/PublicInbox/GitAsyncCat.pm @@ -45,6 +45,16 @@ sub event_step { } } +sub watch_cat { + my ($git) = @_; + $git->{async_cat} //= do { + my $self = bless { git => $git }, __PACKAGE__; + $git->{in}->blocking(0); + $self->SUPER::new($git->{in}, EPOLLIN|EPOLLET); + \undef; # this is a true ref() + }; +} + sub ibx_async_cat ($$$$) { my ($ibx, $oid, $cb, $arg) = @_; my $git = $ibx->{git} // $ibx->git; @@ -60,12 +70,7 @@ sub ibx_async_cat ($$$$) { \undef; } else { # read-only end of git-cat-file pipe $git->cat_async($oid, $cb, $arg); - $git->{async_cat} //= do { - my $self = bless { git => $git }, __PACKAGE__; - $git->{in}->blocking(0); - $self->SUPER::new($git->{in}, EPOLLIN|EPOLLET); - \undef; # this is a true ref() - }; + watch_cat($git); } } diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index 1df5572d..d0e20fb5 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -197,7 +197,9 @@ sub news_cgit_fallback ($) { my $www = $ctx->{www}; my $env = $ctx->{env}; my $res = $www->news_www->call($env); - $res->[0] == 404 ? $www->cgit->call($env) : $res; + $res = $www->cgit->call($env) if $res->[0] == 404; + $res = $www->coderepo->srv($ctx) if $res->[0] == 404; + $res; } # returns undef if valid, array ref response if invalid @@ -494,6 +496,14 @@ sub cgit { } } +sub coderepo { + my ($self) = @_; + $self->{coderepo} //= do { + require PublicInbox::WwwCoderepo; + PublicInbox::WwwCoderepo->new($self->{pi_cfg}); + } +} + # GET $INBOX/manifest.js.gz sub get_inbox_manifest ($$$) { my ($ctx, $inbox, $key) = @_; diff --git a/lib/PublicInbox/WwwCoderepo.pm b/lib/PublicInbox/WwwCoderepo.pm new file mode 100644 index 00000000..4b1a4f9b --- /dev/null +++ b/lib/PublicInbox/WwwCoderepo.pm @@ -0,0 +1,185 @@ +# Copyright (C) all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +# +# Standalone code repository viewer for users w/o cgit +package PublicInbox::WwwCoderepo; +use v5.12; +use File::Temp 0.19 (); # newdir +use PublicInbox::ViewVCS; +use PublicInbox::WwwStatic qw(r); +use PublicInbox::GitHTTPBackend; +use PublicInbox::Git; +use PublicInbox::GitAsyncCat; +use PublicInbox::WwwStream; +use PublicInbox::Hval qw(ascii_html); + +my $EACH_REF = "git for-each-ref --sort=-creatordate --format='%(HEAD)%00". + join('%00', map { "%($_)" } + qw(objectname refname:short subject creatordate:short))."'"; + +# shared with PublicInbox::Cgit +sub prepare_coderepos { + my ($self) = @_; + my $pi_cfg = $self->{pi_cfg}; + + # TODO: support gitweb and other repository viewers? + if (defined(my $cgitrc = $pi_cfg->{-cgitrc_unparsed})) { + $pi_cfg->parse_cgitrc($cgitrc, 0); + } + my $code_repos = $pi_cfg->{-code_repos}; + for my $k (grep(/\Acoderepo\.(?:.+)\.dir\z/, keys %$pi_cfg)) { + $k = substr($k, length('coderepo.'), -length('.dir')); + $code_repos->{$k} //= $pi_cfg->fill_code_repo($k); + } + while (my ($nick, $repo) = each %$code_repos) { + $self->{"\0$nick"} = $repo; + } +} + +sub new { + my ($cls, $pi_cfg) = @_; + my $self = bless { pi_cfg => $pi_cfg }, $cls; + prepare_coderepos($self); + $self->{$_} = 10 for qw(summary_branches summary_tags); + $self->{$_} = 10 for qw(summary_log); + $self; +} + +sub summary_finish { + my ($ctx) = @_; + my $wcb = delete($ctx->{env}->{'qspawn.wcb'}) or return; # already done + my @x = split(/\n\n/sm, delete($ctx->{-each_refs})); + PublicInbox::WwwStream::html_init($ctx); + my $zfh = $ctx->zfh; + + # git log + my @r = split(/\n/s, pop(@x) // ''); + my $last = pop(@r) if scalar(@r) > $ctx->{wcr}->{summary_log}; + print $zfh '<pre><a id=log>$</a> '. + "git log --pretty=format:'%h %s (%cs)%d'\n"; + for (@r) { + my $d; # decorations + s/^ \(([^\)]+)\)// and $d = $1; + substr($_, 0, 1, ''); + my ($H, $h, $cs, $s) = split(/ /, $_, 4); + print $zfh "<a\nhref=./$H/s/>$h</a> ", ascii_html($s), + " (", $cs, ")\n"; + print $zfh "\t(", ascii_html($d), ")\n" if $d; + } + print $zfh "# no commits, yet\n" if !@r; + print $zfh "...\n" if $last; + + # README + my ($bref, $oid, $ref_path) = @{delete $ctx->{-readme}}; + if ($bref) { + my $l = PublicInbox::Linkify->new; + $$bref =~ s/\s*\z//sm; + print $zfh "\n<a id=readme>\$</a> " . + "git cat-file blob <a href=./$oid/s/>", + ascii_html($ref_path), "</a>\n", + $l->to_html($$bref), '</pre><hr><pre>'; + } + + # refs/heads + print $zfh "<a id=heads># heads (aka `branches'):</a>\n\$ " . + "git for-each-ref --sort=-creatordate refs/heads" . + " \\\n\t--format='%(HEAD) ". # no space for %(align:) hint + "%(refname:short) %(subject) (%(creatordate:short))'\n"; + @r = split(/^/sm, shift(@x) // ''); + $last = pop(@r) if scalar(@r) > $ctx->{wcr}->{summary_branches}; + for (@r) { + my ($pfx, $oid, $ref, $s, $cd) = split(/\0/); + utf8::decode($_) for ($ref, $s); + chomp $cd; + my $align = length($ref) < 12 ? ' ' x (12 - length($ref)) : ''; + print $zfh "$pfx <a\nhref=./$oid/s/>", ascii_html($ref), + "</a>$align ", ascii_html($s), " ($cd)\n"; + } + print $zfh "# no heads (branches) yet...\n" if !@r; + print $zfh "...\n" if $last; + print $zfh "\n<a id=tags># tags:</a>\n\$ " . + "git for-each-ref --sort=-creatordate refs/tags" . + " \\\n\t--format='". # no space for %(align:) hint + "%(refname:short) %(subject) (%(creatordate:short))'\n"; + @r = split(/^/sm, shift(@x) // ''); + $last = pop(@r) if scalar(@r) > $ctx->{wcr}->{summary_tags}; + for (@r) { + my (undef, $oid, $ref, $s, $cd) = split(/\0/); + utf8::decode($_) for ($ref, $s); + chomp $cd; + my $align = length($ref) < 12 ? ' ' x (12 - length($ref)) : ''; + print $zfh "<a\nhref=./$oid/s/>", ascii_html($ref), + "</a>$align ", ascii_html($s), " ($cd)\n"; + } + print $zfh "# no tags yet...\n" if !@r; + print $zfh "...\n" if $last; + $wcb->($ctx->html_done('</pre>')); +} + +sub capture_refs ($$) { # psgi_qx callback to capture git-for-each-ref + git-log + my ($bref, $ctx) = @_; + my $qsp_err = delete $ctx->{-qsp_err}; + $ctx->{-each_refs} = $$bref; + summary_finish($ctx) if $ctx->{-readme}; +} + +sub set_readme { # git->cat_async callback + my ($bref, $oid, $type, $size, $ctx) = @_; + my $ref_path = shift @{$ctx->{-nr_readme_tries}}; # e.g. HEAD:README + if ($type eq 'blob' && !$ctx->{-readme}) { + $ctx->{-readme} = [ $bref, $oid, $ref_path ]; + } elsif (scalar @{$ctx->{-nr_readme_tries}} == 0) { + $ctx->{-readme} //= []; # nothing left to try + } # or try another README... + summary_finish($ctx) if $ctx->{-each_refs} && $ctx->{-readme}; +} + +sub summary { + my ($self, $ctx) = @_; + $ctx->{wcr} = $self; + my $nb = $self->{summary_branches} + 1; + my $nt = $self->{summary_tags} + 1; + my $nl = $self->{summary_log} + 1; + my $qsp = PublicInbox::Qspawn->new([qw(/bin/sh -c), + "$EACH_REF --count=$nb refs/heads; echo && " . + "$EACH_REF --count=$nt refs/tags; echo && " . + "git log -$nl --pretty=format:'%d %H %h %cs %s' --" ], + { GIT_DIR => $ctx->{git}->{git_dir} }); + $qsp->{qsp_err} = \($ctx->{-qsp_err} = ''); + my @try = qw(HEAD:README HEAD:README.md); # TODO: configurable + $ctx->{-nr_readme_tries} = [ @try ]; + $ctx->{git}->cat_async($_, \&set_readme, $ctx) for @try; + if ($ctx->{env}->{'pi-httpd.async'}) { + PublicInbox::GitAsyncCat::watch_cat($ctx->{git}); + } else { # synchronous + $ctx->{git}->cat_async_wait; + } + sub { # $_[0] => PublicInbox::HTTP::{Identity,Chunked} + $ctx->{env}->{'qspawn.wcb'} = $_[0]; + $qsp->psgi_qx($ctx->{env}, undef, \&capture_refs, $ctx); + } +} + +sub srv { # endpoint called by PublicInbox::WWW + my ($self, $ctx) = @_; + my $path_info = $ctx->{env}->{PATH_INFO}; + my $git; + # handle clone requests + if ($path_info =~ m!\A/(.+?)/($PublicInbox::GitHTTPBackend::ANY)\z!x) { + $git = $self->{"\0$1"} and return + PublicInbox::GitHTTPBackend::serve($ctx->{env},$git,$2); + } + $path_info =~ m!\A/(.+?)/\z! and + ($ctx->{git} = $self->{"\0$1"}) and return summary($self, $ctx); + if ($path_info =~ m!\A/(.+?)\z! and ($git = $self->{"\0$1"})) { + my $qs = $ctx->{env}->{QUERY_STRING}; + my $url = $git->base_url($ctx->{env}); + $url .= "?$qs" if $qs ne ''; + [ 301, [ Location => $url, 'Content-Type' => 'text/plain' ], + [ "Redirecting to $url\n" ] ]; + } else { + r(404); + } +} + +1; diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm index 16442d51..92d243eb 100644 --- a/lib/PublicInbox/WwwStream.pm +++ b/lib/PublicInbox/WwwStream.pm @@ -18,7 +18,7 @@ https://public-inbox.org/public-inbox.git) ]; sub base_url ($) { my $ctx = shift; - my $base_url = $ctx->{ibx}->base_url($ctx->{env}); + my $base_url = ($ctx->{ibx} // $ctx->{git})->base_url($ctx->{env}); chop $base_url; # no trailing slash for clone $base_url; } @@ -40,7 +40,7 @@ sub async_eml { # for async_blob_cb sub html_top ($) { my ($ctx) = @_; - my $ibx = $ctx->{ibx}; + my $ibx = $ctx->{ibx} // $ctx->{git}; my $desc = ascii_html($ibx->description); my $title = delete($ctx->{-title_html}) // $desc; my $upfx = $ctx->{-upfx} || ''; @@ -84,8 +84,11 @@ sub html_top ($) { '</head><body>'. $top . (delete($ctx->{-html_tip}) // ''); } +sub inboxes { () } # TODO + sub coderepos ($) { my ($ctx) = @_; + $ctx->{ibx} // return inboxes($ctx); my $cr = $ctx->{ibx}->{coderepo} // return (); my $cfg = $ctx->{www}->{pi_cfg}; my $upfx = ($ctx->{-upfx} // ''). '../'; @@ -114,8 +117,8 @@ sub _html_end { my ($ctx) = @_; my $upfx = $ctx->{-upfx} || ''; my $m = "${upfx}_/text/mirror/"; - my $x; - if ($ctx->{ibx}->can('cloneurl')) { + my $x = ''; + if ($ctx->{ibx} && $ctx->{ibx}->can('cloneurl')) { $x = <<EOF; This is a public inbox, see <a href="$m">mirroring instructions</a> @@ -139,12 +142,15 @@ as well as URLs for IMAP folder(s). EOM } } - } else { + } elsif ($ctx->{ibx}) { # extindex $x = <<EOF; This is an external index of several public inboxes, see <a href="$m">mirroring instructions</a> on how to clone and mirror all data and code used by this external index. EOF + } elsif ($ctx->{git}) { # coderepo + $x = join('', map { "git clone $_\n" } + @{$ctx->{git}->cloneurl($ctx->{env})}); } chomp $x; '<hr><pre>'.join("\n\n", coderepos($ctx), $x).'</pre></body></html>' diff --git a/t/solver_git.t b/t/solver_git.t index e347c711..d6936c47 100644 --- a/t/solver_git.t +++ b/t/solver_git.t @@ -9,7 +9,9 @@ require_git(2.6); use PublicInbox::ContentHash qw(git_sha); use PublicInbox::Spawn qw(popen_rd); require_mods(qw(DBD::SQLite Search::Xapian Plack::Util)); -my $git_dir = xqx([qw(git rev-parse --git-dir)], undef, {2 => \(my $null)}); +my $rdr = { 2 => \(my $null) }; +my $git_dir = xqx([qw(git rev-parse --git-common-dir)], undef, $rdr); +$git_dir = xqx([qw(git rev-parse --git-dir)], undef, $rdr) if $? != 0; $? == 0 or plan skip_all => "$0 must be run from a git working tree"; chomp $git_dir; @@ -300,6 +302,35 @@ EOF is($res->code, 200, 'shows commit w/ utf8.eml'); like($res->content, qr/Eléanor/, 'UTF-8 commit shown properly'); + + # WwwCoderepo + my $olderr; + if (defined $ENV{PLACK_TEST_EXTERNALSERVER_URI}) { + ok(!-s "$tmpdir/stderr.log", + 'nothing in stderr.log, yet'); + } else { + open $olderr, '>&', \*STDERR or xbail "open: $!"; + open STDERR, '+>>', "$tmpdir/stderr.log" or + xbail "open: $!"; + } + $res = $cb->(GET('/binfoo/')); + defined($ENV{PLACK_TEST_EXTERNALSERVER_URI}) or + open STDERR, '>&', $olderr or xbail "open: $!"; + is($res->code, 200, 'coderepo summary (binfoo)'); + if (ok(-s "$tmpdir/stderr.log")) { + open my $fh, '<', "$tmpdir/stderr.log" or xbail $!; + my $s = do { local $/; <$fh> }; + open $fh, '>', "$tmpdir/stderr.log" or xbail $!; + ok($s =~ s/^fatal: your current branch.*?\n//sm, + 'got current branch warning'); + ok($s =~ s/^.*? exit status=[1-9]+ .*?\n//sm, + 'got exit status warning'); + is($s, '', 'no unexpected warnings on empty coderepo'); + } + $res = $cb->(GET('/public-inbox/')); + is($res->code, 200, 'coderepo summary (public-inbox)'); + $res = $cb->(GET('/public-inbox')); + is($res->code, 301, 'redirected'); }; test_psgi(sub { $www->call(@_) }, $client); my $env = { PI_CONFIG => $cfgpath, TMPDIR => $tmpdir };
Just reusing ViewVCS::show, since encoding refname and pathnames into things just makes things slower. --- lib/PublicInbox/SolverGit.pm | 8 ++++---- lib/PublicInbox/ViewVCS.pm | 1 + lib/PublicInbox/WwwCoderepo.pm | 4 ++++ 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/lib/PublicInbox/SolverGit.pm b/lib/PublicInbox/SolverGit.pm index b723b48a..80bb0a17 100644 --- a/lib/PublicInbox/SolverGit.pm +++ b/lib/PublicInbox/SolverGit.pm @@ -639,7 +639,7 @@ sub resolve_patch ($$) { # scan through inboxes to look for emails which results in # the oid we want: - my $ibx = shift(@{$want->{try_ibxs}}) or die 'BUG: {try_ibxs} empty'; + my $ibx = shift(@{$want->{try_ibxs}}) or return done($self, undef); if (my $msgs = find_smsgs($self, $ibx, $want)) { $want->{try_smsgs} = $msgs; $want->{cur_ibx} = $ibx; @@ -654,14 +654,14 @@ sub resolve_patch ($$) { sub new { my ($class, $ibx, $user_cb, $uarg) = @_; - bless { - gits => $ibx->{-repo_objs}, + bless { # $ibx is undef if coderepo only (see WwwCoderepo) + gits => $ibx ? $ibx->{-repo_objs} : undef, user_cb => $user_cb, uarg => $uarg, # -cur_di, -qsp_err, -msg => temp fields for Qspawn callbacks # TODO: config option for searching related inboxes - inboxes => [ $ibx ], + inboxes => $ibx ? [ $ibx ] : [], }, $class; } diff --git a/lib/PublicInbox/ViewVCS.pm b/lib/PublicInbox/ViewVCS.pm index b0f58455..6ada03e6 100644 --- a/lib/PublicInbox/ViewVCS.pm +++ b/lib/PublicInbox/ViewVCS.pm @@ -484,6 +484,7 @@ sub show ($$;$) { open $ctx->{lh}, '+>>', "$ctx->{-tmp}/solve.log" or die "open: $!"; my $solver = PublicInbox::SolverGit->new($ctx->{ibx}, \&solve_result, $ctx); + $solver->{gits} //= [ $ctx->{git} ]; $solver->{tmp} = $ctx->{-tmp}; # share tmpdir # PSGI server will call this immediately and give us a callback (-wcb) sub { diff --git a/lib/PublicInbox/WwwCoderepo.pm b/lib/PublicInbox/WwwCoderepo.pm index 4b1a4f9b..e0fc9045 100644 --- a/lib/PublicInbox/WwwCoderepo.pm +++ b/lib/PublicInbox/WwwCoderepo.pm @@ -171,6 +171,10 @@ sub srv { # endpoint called by PublicInbox::WWW } $path_info =~ m!\A/(.+?)/\z! and ($ctx->{git} = $self->{"\0$1"}) and return summary($self, $ctx); + $path_info =~ m!\A/(.+?)/([a-f0-9]+)/s/\z! and + ($ctx->{git} = $self->{"\0$1"}) and + return PublicInbox::ViewVCS::show($ctx, $2); + if ($path_info =~ m!\A/(.+?)\z! and ($git = $self->{"\0$1"})) { my $qs = $ctx->{env}->{QUERY_STRING}; my $url = $git->base_url($ctx->{env});
It'll be used directly (outside of ->pub_urls) in the standalone coderepo viewer for tarball snapshots. --- lib/PublicInbox/Git.pm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm index 395add1f..691462ed 100644 --- a/lib/PublicInbox/Git.pm +++ b/lib/PublicInbox/Git.pm @@ -451,7 +451,7 @@ sub DESTROY { cleanup(@_) } sub local_nick ($) { # don't show full FS path, basename should be OK: - $_[0]->{git_dir} =~ m!/([^/]+?)(?:/*\.git/*)?\z! ? "$1.git" : '???'; + $_[0]->{git_dir} =~ m!/([^/]+?)(?:/*\.git/*)?\z! ? "$1.git" : undef; } sub host_prefix_url ($$) { @@ -478,7 +478,7 @@ sub pub_urls { if (my $urls = $self->{cgit_url}) { return map { host_prefix_url($env, $_) } @$urls; } - (local_nick($self)); + (local_nick($self) // '???'); } sub cat_async_begin {
These should be compatible with cgit results --- MANIFEST | 1 + lib/PublicInbox/Git.pm | 1 + lib/PublicInbox/GitAsyncCat.pm | 49 +++++++++++++++-- lib/PublicInbox/RepoSnapshot.pm | 95 +++++++++++++++++++++++++++++++++ lib/PublicInbox/WwwCoderepo.pm | 8 +++ t/solver_git.t | 20 +++++++ 6 files changed, 171 insertions(+), 3 deletions(-) create mode 100644 lib/PublicInbox/RepoSnapshot.pm diff --git a/MANIFEST b/MANIFEST index cf6d97e1..29f368de 100644 --- a/MANIFEST +++ b/MANIFEST @@ -306,6 +306,7 @@ lib/PublicInbox/PktOp.pm lib/PublicInbox/ProcessPipe.pm lib/PublicInbox/Qspawn.pm lib/PublicInbox/Reply.pm +lib/PublicInbox/RepoSnapshot.pm lib/PublicInbox/SaPlugin/ListMirror.pm lib/PublicInbox/SaPlugin/ListMirror.pod lib/PublicInbox/Search.pm diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm index 691462ed..2ed3a29b 100644 --- a/lib/PublicInbox/Git.pm +++ b/lib/PublicInbox/Git.pm @@ -426,6 +426,7 @@ sub cleanup { scalar(@{$self->{inflight} // []})); local $in_cleanup = 1; delete $self->{async_cat}; + delete $self->{async_chk}; async_wait_all($self); delete $self->{inflight}; delete $self->{inflight_c}; diff --git a/lib/PublicInbox/GitAsyncCat.pm b/lib/PublicInbox/GitAsyncCat.pm index 613dbf7e..2e0725a6 100644 --- a/lib/PublicInbox/GitAsyncCat.pm +++ b/lib/PublicInbox/GitAsyncCat.pm @@ -1,14 +1,14 @@ -# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org> +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> # # internal class used by PublicInbox::Git + PublicInbox::DS # This parses the output pipe of "git cat-file --batch" package PublicInbox::GitAsyncCat; -use strict; +use v5.12; use parent qw(PublicInbox::DS Exporter); use POSIX qw(WNOHANG); use PublicInbox::Syscall qw(EPOLLIN EPOLLET); -our @EXPORT = qw(ibx_async_cat ibx_async_prefetch); +our @EXPORT = qw(ibx_async_cat ibx_async_prefetch async_check); use PublicInbox::Git (); our $GCF2C; # singleton PublicInbox::Gcf2Client @@ -74,6 +74,18 @@ sub ibx_async_cat ($$$$) { } } +sub async_check ($$$$) { + my ($ibx, $oidish, $cb, $arg) = @_; + my $git = $ibx->{git} // $ibx->git; + $git->check_async($oidish, $cb, $arg); + $git->{async_chk} //= do { + my $self = bless { git => $git }, 'PublicInbox::GitAsyncCheck'; + $git->{in_c}->blocking(0); + $self->SUPER::new($git->{in_c}, EPOLLIN|EPOLLET); + \undef; # this is a true ref() + }; +} + # this is safe to call inside $cb, but not guaranteed to enqueue # returns true if successful, undef if not. For fairness, we only # prefetch if there's no in-flight requests. @@ -96,3 +108,34 @@ sub ibx_async_prefetch { } 1; +package PublicInbox::GitAsyncCheck; +use v5.12; +our @ISA = qw(PublicInbox::GitAsyncCat); +use POSIX qw(WNOHANG); +use PublicInbox::Syscall qw(EPOLLIN EPOLLET); + +sub event_step { + my ($self) = @_; + my $git = $self->{git} or return; + return $self->close if ($git->{in_c} // 0) != ($self->{sock} // 1); + my $inflight = $git->{inflight_c}; + if ($inflight && @$inflight) { + $git->check_async_step($inflight); + + # child death? + if (($git->{in_c} // 0) != ($self->{sock} // 1)) { + $self->close; + } elsif (@$inflight || exists $git->{rbuf_c}) { + # ok, more to do, requeue for fairness + $self->requeue; + } + } elsif ((my $pid = waitpid($git->{pid_c}, WNOHANG)) > 0) { + # May happen if the child process is killed by a BOFH + # (or segfaults) + delete $git->{pid_c}; + warn "E: git $pid exited with \$?=$?\n"; + $self->close; + } +} + +1; diff --git a/lib/PublicInbox/RepoSnapshot.pm b/lib/PublicInbox/RepoSnapshot.pm new file mode 100644 index 00000000..460340e6 --- /dev/null +++ b/lib/PublicInbox/RepoSnapshot.pm @@ -0,0 +1,95 @@ +# Copyright (C) all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> + +# cgit-compatible /snapshot/ endpoint for WWW coderepos +package PublicInbox::RepoSnapshot; +use v5.12; +use PublicInbox::Git; +use PublicInbox::Qspawn; +use PublicInbox::GitAsyncCat; +use PublicInbox::WwwStatic qw(r); + +# Not using standard mime types since the compressed tarballs are +# special or do not match my /etc/mime.types. Choose what gitweb +# and cgit agree on for compatibility. +our %FMT_TYPES = ( + 'tar' => 'application/x-tar', + 'tar.gz' => 'application/x-gzip', + 'tar.bz2' => 'application/x-bzip2', + 'tar.xz' => 'application/x-xz', + 'zip' => 'application/x-zip', +); + +our %FMT_CFG = ( + 'tar.xz' => 'xz -c', + 'tar.bz2' => 'bzip2 -c', + # not supporting lz nor zstd for now to avoid format proliferation + # and increased cache overhead required to handle extra formats. +); + +my $SUFFIX = join('|', map { quotemeta } keys %FMT_TYPES); + +# TODO deal with tagged blobs + +sub archive_hdr { # parse_hdr for Qspawn + my ($r, $bref, $ctx) = @_; + $r or return [500, [qw(Content-Type text/plain Content-Length 0)], []]; + my $fn = "$ctx->{snap_pfx}.$ctx->{snap_fmt}"; + my $type = $FMT_TYPES{$ctx->{snap_fmt}} // + die "BUG: bad fmt: $ctx->{snap_fmt}"; + [ 200, [ 'Content-Type', "$type; charset=UTF-8", + 'Content-Disposition', qq(inline; filename="$fn"), + 'ETag', qq("$ctx->{etag}") ] ]; +} + +sub archive_cb { + my ($ctx) = @_; + my @cfg; + if (my $cmd = $FMT_CFG{$ctx->{snap_fmt}}) { + @cfg = ('-c', "tar.$ctx->{snap_fmt}.command=$cmd"); + } + my $qsp = PublicInbox::Qspawn->new(['git', @cfg, + "--git-dir=$ctx->{git}->{git_dir}", 'archive', + "--prefix=$ctx->{snap_pfx}/", + "--format=$ctx->{snap_fmt}", $ctx->{treeish}]); + $qsp->psgi_return($ctx->{env}, undef, \&archive_hdr, $ctx); +} + +sub ver_check { # git->check_async callback + my ($oid, $type, $size, $ctx) = @_; + if ($type eq 'missing') { # try 'v' and 'V' prefixes + my $pfx = shift @{$ctx->{try_pfx}} or return + delete($ctx->{env}->{'qspawn.wcb'})->(r(404)); + my $v = $ctx->{treeish} = $pfx.$ctx->{snap_ver}; + return $ctx->{env}->{'pi-httpd.async'} ? + async_check($ctx, $v, \&ver_check, $ctx) : + $ctx->{git}->check_async($v, \&ver_check, $ctx); + } + $ctx->{etag} = $oid; + archive_cb($ctx); +} + +sub srv { + my ($ctx, $fn) = @_; + return if $fn =~ /["\s]/s; + $fn =~ s/\.($SUFFIX)\z//o or return; + $ctx->{snap_fmt} = $1; + my $pfx = $ctx->{git}->local_nick // return; + $pfx =~ s/(?:\.git)?\z/-/; + substr($fn, 0, length($pfx)) eq $pfx or return; + $ctx->{snap_pfx} = $fn; + my $v = $ctx->{snap_ver} = substr($fn, length($pfx), length($fn)); + $ctx->{treeish} = $v; # try without [vV] prefix, first + @{$ctx->{try_pfx}} = qw(v V); # cf. cgit:ui-snapshot.c + sub { + $ctx->{env}->{'qspawn.wcb'} = $_[0]; + if ($ctx->{env}->{'pi-httpd.async'}) { + async_check($ctx, $v, \&ver_check, $ctx); + } else { + $ctx->{git}->check_async($v, \&ver_check, $ctx); + $ctx->{git}->check_async_wait; + } + } +} + +1; diff --git a/lib/PublicInbox/WwwCoderepo.pm b/lib/PublicInbox/WwwCoderepo.pm index e0fc9045..fb510b28 100644 --- a/lib/PublicInbox/WwwCoderepo.pm +++ b/lib/PublicInbox/WwwCoderepo.pm @@ -175,6 +175,14 @@ sub srv { # endpoint called by PublicInbox::WWW ($ctx->{git} = $self->{"\0$1"}) and return PublicInbox::ViewVCS::show($ctx, $2); + # snapshots: + if ($path_info =~ m!\A/(.+?)/snapshot/([^/]+)\z! and + ($ctx->{git} = $self->{"\0$1"})) { + require PublicInbox::RepoSnapshot; + return PublicInbox::RepoSnapshot::srv($ctx, $2) // r(404); + } + + # enforce trailing slash: if ($path_info =~ m!\A/(.+?)\z! and ($git = $self->{"\0$1"})) { my $qs = $ctx->{env}->{QUERY_STRING}; my $url = $git->base_url($ctx->{env}); diff --git a/t/solver_git.t b/t/solver_git.t index d6936c47..71b9554a 100644 --- a/t/solver_git.t +++ b/t/solver_git.t @@ -34,6 +34,7 @@ File::Path::mkpath([map { $md.$_ } (qw(/ /cur /new /tmp))]); symlink(abs_path('t/solve/0001-simple-mod.patch'), "$md/cur/foo:2,") or xbail "symlink: $!"; +my $v1_0_0_rev = '8a918a8523bc9904123460f85999d75f6d604916'; my $v1_0_0_tag = 'cb7c42b1e15577ed2215356a2bf925aef59cdd8d'; my $v1_0_0_tag_short = substr($v1_0_0_tag, 0, 16); my $expect = '69df7d565d49fbaaeb0a067910f03dc22cd52bd0'; @@ -331,6 +332,25 @@ EOF is($res->code, 200, 'coderepo summary (public-inbox)'); $res = $cb->(GET('/public-inbox')); is($res->code, 301, 'redirected'); + + my $fn = 'public-inbox-1.0.0.tar.gz'; + $res = $cb->(GET("/public-inbox/snapshot/$fn")); + is($res->code, 200, 'tar.gz snapshot'); + is($res->header('Content-Disposition'), + qq'inline; filename="$fn"', 'c-d header'); + is($res->header('ETag'), qq'"$v1_0_0_rev"', 'etag header'); + my $exp = xqx([qw(git archive --format=tar.gz + --prefix=public-inbox-1.0.0/ v1.0.0)], + { GIT_DIR => $git_dir }); + my $got = $res->content; + is(length($got), length($exp), + "length matches installed `git archive' output") and + is(git_sha(1, \$got)->hexdigest, git_sha(1, \$exp)->hexdigest, + "content matches installed `git archive' output"); + + $fn = 'public-inbox-1.0.2.tar.gz'; + $res = $cb->(GET("/public-inbox/snapshot/$fn")); + is($res->code, 404, '404 on non-existent tag'); }; test_psgi(sub { $www->call(@_) }, $client); my $env = { PI_CONFIG => $cfgpath, TMPDIR => $tmpdir };
This is already used by */$OID/s/, so just reuse existing code and make git->local_nick use the assigned nick from the config file, if there is one. --- lib/PublicInbox/Git.pm | 3 ++- lib/PublicInbox/WwwStream.pm | 17 +++++++---------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm index 2ed3a29b..882a9a4a 100644 --- a/lib/PublicInbox/Git.pm +++ b/lib/PublicInbox/Git.pm @@ -452,7 +452,8 @@ sub DESTROY { cleanup(@_) } sub local_nick ($) { # don't show full FS path, basename should be OK: - $_[0]->{git_dir} =~ m!/([^/]+?)(?:/*\.git/*)?\z! ? "$1.git" : undef; + $_[0]->{nick} // ($_[0]->{git_dir} =~ m!/([^/]+?)(?:/*\.git/*)?\z! ? + "$1.git" : undef); } sub host_prefix_url ($$) { diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm index 92d243eb..ccac5e8b 100644 --- a/lib/PublicInbox/WwwStream.pm +++ b/lib/PublicInbox/WwwStream.pm @@ -98,16 +98,13 @@ sub coderepos ($) { my @ret = ('<a id=code>' . 'Code repositories for project(s) associated with this '. $ctx->{ibx}->thing_type . "\n"); - for my $cr_name (@$cr) { - my $urls = $cfg->get_all("coderepo.$cr_name.cgiturl"); - if ($urls) { - for (@$urls) { - my $u = m!\A(?:[a-z\+]+:)?//! ? $_ : $pfx.$_; - $u = ascii_html(prurl($ctx->{env}, $u)); - $ret[0] .= qq(\n\t<a\nhref="$u">$u</a>); - } - } else { - $ret[0] .= qq[\n\t$cr_name.git (no URL configured)]; + my $objs = $cfg->repo_objs($ctx->{ibx}); + for my $git (@$objs) { + my @urls = $git->pub_urls; + for (@urls) { + my $u = m!\A(?:[a-z\+]+:)?//! ? $_ : $pfx.$_; + $u = ascii_html(prurl($ctx->{env}, $u)); + $ret[0] .= qq(\n\t<a\nhref="$u">$u</a>); } } @ret; # may be empty, this sub is called as an arg for join()
This needs to be expanded, but quick links to heads/tags/README shouldn't hurt... --- lib/PublicInbox/WwwCoderepo.pm | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/lib/PublicInbox/WwwCoderepo.pm b/lib/PublicInbox/WwwCoderepo.pm index fb510b28..6c119b28 100644 --- a/lib/PublicInbox/WwwCoderepo.pm +++ b/lib/PublicInbox/WwwCoderepo.pm @@ -55,8 +55,16 @@ sub summary_finish { # git log my @r = split(/\n/s, pop(@x) // ''); my $last = pop(@r) if scalar(@r) > $ctx->{wcr}->{summary_log}; - print $zfh '<pre><a id=log>$</a> '. - "git log --pretty=format:'%h %s (%cs)%d'\n"; + print $zfh <<EOM; +<pre> +<a +href='#readme'>about</a> <a +href='#heads'>heads</a> <a +href='#tags'>tags</a> + +<a +id=log>\$</a> git log --pretty=format:'%h %s (%cs)%d' +EOM for (@r) { my $d; # decorations s/^ \(([^\)]+)\)// and $d = $1;
This ensures URLs are generated properly and avoids undefined variable warnings. --- Will squash this into 9/10 <20221004191240.1056304-10-e@80x24.org> lib/PublicInbox/WwwStream.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm index ccac5e8b..f5b4df9f 100644 --- a/lib/PublicInbox/WwwStream.pm +++ b/lib/PublicInbox/WwwStream.pm @@ -100,7 +100,7 @@ sub coderepos ($) { $ctx->{ibx}->thing_type . "\n"); my $objs = $cfg->repo_objs($ctx->{ibx}); for my $git (@$objs) { - my @urls = $git->pub_urls; + my @urls = $git->pub_urls($ctx->{env}); for (@urls) { my $u = m!\A(?:[a-z\+]+:)?//! ? $_ : $pfx.$_; $u = ascii_html(prurl($ctx->{env}, $u));