From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.2 required=3.0 tests=ALL_TRUSTED,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 9D6FD1F52B for ; Tue, 4 Oct 2022 19:12:41 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1664910761; bh=SUZE6L54d8mGtTThEsG8IfngQXBjMSd6q8OZ49ynZsg=; h=From:To:Subject:Date:In-Reply-To:References:From; b=p0OqzDDPxA+o/UOK4GAzL0QAht1OFZIINZRCwiSpo+5n4Sj6rsosEbnb06ZCZrrp+ oidK01YIQ7sPvPq7r5xASBpZ9e0Ero+ei5dsUTBEUhQhFUY8gecCr9OQTedX0JQbd2 LtlLP5nga85w3/6lPe1jlr4mvKP1XiiYID/G3HIA= From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 08/10] www_coderepo: wire up snapshot support Date: Tue, 4 Oct 2022 19:12:38 +0000 Message-Id: <20221004191240.1056304-9-e@80x24.org> In-Reply-To: <20221004191240.1056304-1-e@80x24.org> References: <20221004191240.1056304-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: These should be compatible with cgit results --- MANIFEST | 1 + lib/PublicInbox/Git.pm | 1 + lib/PublicInbox/GitAsyncCat.pm | 49 +++++++++++++++-- lib/PublicInbox/RepoSnapshot.pm | 95 +++++++++++++++++++++++++++++++++ lib/PublicInbox/WwwCoderepo.pm | 8 +++ t/solver_git.t | 20 +++++++ 6 files changed, 171 insertions(+), 3 deletions(-) create mode 100644 lib/PublicInbox/RepoSnapshot.pm diff --git a/MANIFEST b/MANIFEST index cf6d97e1..29f368de 100644 --- a/MANIFEST +++ b/MANIFEST @@ -306,6 +306,7 @@ lib/PublicInbox/PktOp.pm lib/PublicInbox/ProcessPipe.pm lib/PublicInbox/Qspawn.pm lib/PublicInbox/Reply.pm +lib/PublicInbox/RepoSnapshot.pm lib/PublicInbox/SaPlugin/ListMirror.pm lib/PublicInbox/SaPlugin/ListMirror.pod lib/PublicInbox/Search.pm diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm index 691462ed..2ed3a29b 100644 --- a/lib/PublicInbox/Git.pm +++ b/lib/PublicInbox/Git.pm @@ -426,6 +426,7 @@ sub cleanup { scalar(@{$self->{inflight} // []})); local $in_cleanup = 1; delete $self->{async_cat}; + delete $self->{async_chk}; async_wait_all($self); delete $self->{inflight}; delete $self->{inflight_c}; diff --git a/lib/PublicInbox/GitAsyncCat.pm b/lib/PublicInbox/GitAsyncCat.pm index 613dbf7e..2e0725a6 100644 --- a/lib/PublicInbox/GitAsyncCat.pm +++ b/lib/PublicInbox/GitAsyncCat.pm @@ -1,14 +1,14 @@ -# Copyright (C) 2020-2021 all contributors +# Copyright (C) all contributors # License: AGPL-3.0+ # # internal class used by PublicInbox::Git + PublicInbox::DS # This parses the output pipe of "git cat-file --batch" package PublicInbox::GitAsyncCat; -use strict; +use v5.12; use parent qw(PublicInbox::DS Exporter); use POSIX qw(WNOHANG); use PublicInbox::Syscall qw(EPOLLIN EPOLLET); -our @EXPORT = qw(ibx_async_cat ibx_async_prefetch); +our @EXPORT = qw(ibx_async_cat ibx_async_prefetch async_check); use PublicInbox::Git (); our $GCF2C; # singleton PublicInbox::Gcf2Client @@ -74,6 +74,18 @@ sub ibx_async_cat ($$$$) { } } +sub async_check ($$$$) { + my ($ibx, $oidish, $cb, $arg) = @_; + my $git = $ibx->{git} // $ibx->git; + $git->check_async($oidish, $cb, $arg); + $git->{async_chk} //= do { + my $self = bless { git => $git }, 'PublicInbox::GitAsyncCheck'; + $git->{in_c}->blocking(0); + $self->SUPER::new($git->{in_c}, EPOLLIN|EPOLLET); + \undef; # this is a true ref() + }; +} + # this is safe to call inside $cb, but not guaranteed to enqueue # returns true if successful, undef if not. For fairness, we only # prefetch if there's no in-flight requests. @@ -96,3 +108,34 @@ sub ibx_async_prefetch { } 1; +package PublicInbox::GitAsyncCheck; +use v5.12; +our @ISA = qw(PublicInbox::GitAsyncCat); +use POSIX qw(WNOHANG); +use PublicInbox::Syscall qw(EPOLLIN EPOLLET); + +sub event_step { + my ($self) = @_; + my $git = $self->{git} or return; + return $self->close if ($git->{in_c} // 0) != ($self->{sock} // 1); + my $inflight = $git->{inflight_c}; + if ($inflight && @$inflight) { + $git->check_async_step($inflight); + + # child death? + if (($git->{in_c} // 0) != ($self->{sock} // 1)) { + $self->close; + } elsif (@$inflight || exists $git->{rbuf_c}) { + # ok, more to do, requeue for fairness + $self->requeue; + } + } elsif ((my $pid = waitpid($git->{pid_c}, WNOHANG)) > 0) { + # May happen if the child process is killed by a BOFH + # (or segfaults) + delete $git->{pid_c}; + warn "E: git $pid exited with \$?=$?\n"; + $self->close; + } +} + +1; diff --git a/lib/PublicInbox/RepoSnapshot.pm b/lib/PublicInbox/RepoSnapshot.pm new file mode 100644 index 00000000..460340e6 --- /dev/null +++ b/lib/PublicInbox/RepoSnapshot.pm @@ -0,0 +1,95 @@ +# Copyright (C) all contributors +# License: AGPL-3.0+ + +# cgit-compatible /snapshot/ endpoint for WWW coderepos +package PublicInbox::RepoSnapshot; +use v5.12; +use PublicInbox::Git; +use PublicInbox::Qspawn; +use PublicInbox::GitAsyncCat; +use PublicInbox::WwwStatic qw(r); + +# Not using standard mime types since the compressed tarballs are +# special or do not match my /etc/mime.types. Choose what gitweb +# and cgit agree on for compatibility. +our %FMT_TYPES = ( + 'tar' => 'application/x-tar', + 'tar.gz' => 'application/x-gzip', + 'tar.bz2' => 'application/x-bzip2', + 'tar.xz' => 'application/x-xz', + 'zip' => 'application/x-zip', +); + +our %FMT_CFG = ( + 'tar.xz' => 'xz -c', + 'tar.bz2' => 'bzip2 -c', + # not supporting lz nor zstd for now to avoid format proliferation + # and increased cache overhead required to handle extra formats. +); + +my $SUFFIX = join('|', map { quotemeta } keys %FMT_TYPES); + +# TODO deal with tagged blobs + +sub archive_hdr { # parse_hdr for Qspawn + my ($r, $bref, $ctx) = @_; + $r or return [500, [qw(Content-Type text/plain Content-Length 0)], []]; + my $fn = "$ctx->{snap_pfx}.$ctx->{snap_fmt}"; + my $type = $FMT_TYPES{$ctx->{snap_fmt}} // + die "BUG: bad fmt: $ctx->{snap_fmt}"; + [ 200, [ 'Content-Type', "$type; charset=UTF-8", + 'Content-Disposition', qq(inline; filename="$fn"), + 'ETag', qq("$ctx->{etag}") ] ]; +} + +sub archive_cb { + my ($ctx) = @_; + my @cfg; + if (my $cmd = $FMT_CFG{$ctx->{snap_fmt}}) { + @cfg = ('-c', "tar.$ctx->{snap_fmt}.command=$cmd"); + } + my $qsp = PublicInbox::Qspawn->new(['git', @cfg, + "--git-dir=$ctx->{git}->{git_dir}", 'archive', + "--prefix=$ctx->{snap_pfx}/", + "--format=$ctx->{snap_fmt}", $ctx->{treeish}]); + $qsp->psgi_return($ctx->{env}, undef, \&archive_hdr, $ctx); +} + +sub ver_check { # git->check_async callback + my ($oid, $type, $size, $ctx) = @_; + if ($type eq 'missing') { # try 'v' and 'V' prefixes + my $pfx = shift @{$ctx->{try_pfx}} or return + delete($ctx->{env}->{'qspawn.wcb'})->(r(404)); + my $v = $ctx->{treeish} = $pfx.$ctx->{snap_ver}; + return $ctx->{env}->{'pi-httpd.async'} ? + async_check($ctx, $v, \&ver_check, $ctx) : + $ctx->{git}->check_async($v, \&ver_check, $ctx); + } + $ctx->{etag} = $oid; + archive_cb($ctx); +} + +sub srv { + my ($ctx, $fn) = @_; + return if $fn =~ /["\s]/s; + $fn =~ s/\.($SUFFIX)\z//o or return; + $ctx->{snap_fmt} = $1; + my $pfx = $ctx->{git}->local_nick // return; + $pfx =~ s/(?:\.git)?\z/-/; + substr($fn, 0, length($pfx)) eq $pfx or return; + $ctx->{snap_pfx} = $fn; + my $v = $ctx->{snap_ver} = substr($fn, length($pfx), length($fn)); + $ctx->{treeish} = $v; # try without [vV] prefix, first + @{$ctx->{try_pfx}} = qw(v V); # cf. cgit:ui-snapshot.c + sub { + $ctx->{env}->{'qspawn.wcb'} = $_[0]; + if ($ctx->{env}->{'pi-httpd.async'}) { + async_check($ctx, $v, \&ver_check, $ctx); + } else { + $ctx->{git}->check_async($v, \&ver_check, $ctx); + $ctx->{git}->check_async_wait; + } + } +} + +1; diff --git a/lib/PublicInbox/WwwCoderepo.pm b/lib/PublicInbox/WwwCoderepo.pm index e0fc9045..fb510b28 100644 --- a/lib/PublicInbox/WwwCoderepo.pm +++ b/lib/PublicInbox/WwwCoderepo.pm @@ -175,6 +175,14 @@ sub srv { # endpoint called by PublicInbox::WWW ($ctx->{git} = $self->{"\0$1"}) and return PublicInbox::ViewVCS::show($ctx, $2); + # snapshots: + if ($path_info =~ m!\A/(.+?)/snapshot/([^/]+)\z! and + ($ctx->{git} = $self->{"\0$1"})) { + require PublicInbox::RepoSnapshot; + return PublicInbox::RepoSnapshot::srv($ctx, $2) // r(404); + } + + # enforce trailing slash: if ($path_info =~ m!\A/(.+?)\z! and ($git = $self->{"\0$1"})) { my $qs = $ctx->{env}->{QUERY_STRING}; my $url = $git->base_url($ctx->{env}); diff --git a/t/solver_git.t b/t/solver_git.t index d6936c47..71b9554a 100644 --- a/t/solver_git.t +++ b/t/solver_git.t @@ -34,6 +34,7 @@ File::Path::mkpath([map { $md.$_ } (qw(/ /cur /new /tmp))]); symlink(abs_path('t/solve/0001-simple-mod.patch'), "$md/cur/foo:2,") or xbail "symlink: $!"; +my $v1_0_0_rev = '8a918a8523bc9904123460f85999d75f6d604916'; my $v1_0_0_tag = 'cb7c42b1e15577ed2215356a2bf925aef59cdd8d'; my $v1_0_0_tag_short = substr($v1_0_0_tag, 0, 16); my $expect = '69df7d565d49fbaaeb0a067910f03dc22cd52bd0'; @@ -331,6 +332,25 @@ EOF is($res->code, 200, 'coderepo summary (public-inbox)'); $res = $cb->(GET('/public-inbox')); is($res->code, 301, 'redirected'); + + my $fn = 'public-inbox-1.0.0.tar.gz'; + $res = $cb->(GET("/public-inbox/snapshot/$fn")); + is($res->code, 200, 'tar.gz snapshot'); + is($res->header('Content-Disposition'), + qq'inline; filename="$fn"', 'c-d header'); + is($res->header('ETag'), qq'"$v1_0_0_rev"', 'etag header'); + my $exp = xqx([qw(git archive --format=tar.gz + --prefix=public-inbox-1.0.0/ v1.0.0)], + { GIT_DIR => $git_dir }); + my $got = $res->content; + is(length($got), length($exp), + "length matches installed `git archive' output") and + is(git_sha(1, \$got)->hexdigest, git_sha(1, \$exp)->hexdigest, + "content matches installed `git archive' output"); + + $fn = 'public-inbox-1.0.2.tar.gz'; + $res = $cb->(GET("/public-inbox/snapshot/$fn")); + is($res->code, 404, '404 on non-existent tag'); }; test_psgi(sub { $www->call(@_) }, $client); my $env = { PI_CONFIG => $cfgpath, TMPDIR => $tmpdir };