From: "Eric Wong (Contractor, The Linux Foundation)" <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 3/3] www: support cloning individual v2 git partitions
Date: Tue, 27 Mar 2018 21:27:01 +0000 [thread overview]
Message-ID: <20180327212701.11623-4-e@80x24.org> (raw)
In-Reply-To: <20180327212701.11623-1-e@80x24.org>
This will require multiple client invocations, but should reduce
load on the server and make it easier for readers to only clone
the latest data.
Unfortunately, supporting a cloneurl file for externally-hosted
repos will be more difficult as we cannot easily know if the
clones use v1 or v2 repositories, or how many git partitions
they have.
---
lib/PublicInbox/Inbox.pm | 37 ++++++++++++++++++++++++++++++++++++-
lib/PublicInbox/WWW.pm | 19 +++++++++++--------
lib/PublicInbox/WwwStream.pm | 23 +++++++++++++++++++----
t/psgi_v2.t | 5 +++++
t/view.t | 1 +
5 files changed, 72 insertions(+), 13 deletions(-)
diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm
index b1ea8dc..3097751 100644
--- a/lib/PublicInbox/Inbox.pm
+++ b/lib/PublicInbox/Inbox.pm
@@ -82,6 +82,18 @@ sub new {
bless $opts, $class;
}
+sub git_part {
+ my ($self, $part) = @_;
+ ($self->{version} || 1) == 2 or return;
+ $self->{"$part.git"} ||= eval {
+ my $git_dir = "$self->{mainrepo}/git/$part.git";
+ my $g = PublicInbox::Git->new($git_dir);
+ $g->{-httpbackend_limiter} = $self->{-httpbackend_limiter};
+ # no cleanup needed, we never cat-file off this, only clone
+ $g;
+ };
+}
+
sub git {
my ($self) = @_;
$self->{git} ||= eval {
@@ -94,6 +106,29 @@ sub git {
};
}
+sub max_git_part {
+ my ($self) = @_;
+ my $v = $self->{version};
+ return unless defined($v) && $v == 2;
+ my $part = $self->{-max_git_part};
+ my $changed = git($self)->alternates_changed;
+ if (!defined($part) || $changed) {
+ $self->git->cleanup if $changed;
+ my $gits = "$self->{mainrepo}/git";
+ if (opendir my $dh, $gits) {
+ my $max = -1;
+ while (defined(my $git_dir = readdir($dh))) {
+ $git_dir =~ m!\A(\d+)\.git\z! or next;
+ $max = $1 if $1 > $max;
+ }
+ $part = $self->{-max_git_part} = $max if $max >= 0;
+ } else {
+ warn "opendir $gits failed: $!\n";
+ }
+ }
+ $part;
+}
+
sub mm {
my ($self) = @_;
$self->{mm} ||= eval {
@@ -133,7 +168,7 @@ sub description {
local $/ = "\n";
chomp $desc;
$desc =~ s/\s+/ /smg;
- $desc = '($GIT_DIR/description missing)' if $desc eq '';
+ $desc = '($REPO_DIR/description missing)' if $desc eq '';
$self->{description} = $desc;
}
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index a2c2a4a..7bd2973 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -54,10 +54,10 @@ sub call {
my $method = $env->{REQUEST_METHOD};
if ($method eq 'POST') {
- if ($path_info =~ m!$INBOX_RE/(git-upload-pack)\z!) {
- my $path = $2;
+ if ($path_info =~ m!$INBOX_RE/(?:(\d+)/)?(git-upload-pack)\z!) {
+ my ($part, $path) = ($2, $3);
return invalid_inbox($ctx, $1) ||
- serve_git($ctx, $path);
+ serve_git($ctx, $part, $path);
} elsif ($path_info =~ m!$INBOX_RE/!o) {
return invalid_inbox($ctx, $1) || mbox_results($ctx);
}
@@ -77,10 +77,10 @@ sub call {
invalid_inbox($ctx, $1) || get_atom($ctx);
} elsif ($path_info =~ m!$INBOX_RE/new\.html\z!o) {
invalid_inbox($ctx, $1) || get_new($ctx);
- } elsif ($path_info =~ m!$INBOX_RE/
+ } elsif ($path_info =~ m!$INBOX_RE/(?:(\d+)/)?
($PublicInbox::GitHTTPBackend::ANY)\z!ox) {
- my $path = $2;
- invalid_inbox($ctx, $1) || serve_git($ctx, $path);
+ my ($part, $path) = ($2, $3);
+ invalid_inbox($ctx, $1) || serve_git($ctx, $part, $path);
} elsif ($path_info =~ m!$INBOX_RE/([\w-]+).mbox\.gz\z!o) {
serve_mbox_range($ctx, $1, $2);
} elsif ($path_info =~ m!$INBOX_RE/$MID_RE/$END_RE\z!o) {
@@ -393,8 +393,11 @@ sub msg_page {
}
sub serve_git {
- my ($ctx, $path) = @_;
- PublicInbox::GitHTTPBackend::serve($ctx->{env}, $ctx->{git}, $path);
+ my ($ctx, $part, $path) = @_;
+ my $env = $ctx->{env};
+ my $ibx = $ctx->{-inbox};
+ my $git = defined $part ? $ibx->git_part($part) : $ibx->git;
+ $git ? PublicInbox::GitHTTPBackend::serve($env, $git, $path) : r404();
}
sub mbox_results {
diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm
index 0551998..7631754 100644
--- a/lib/PublicInbox/WwwStream.pm
+++ b/lib/PublicInbox/WwwStream.pm
@@ -72,17 +72,32 @@ sub _html_end {
my $obj = $ctx->{-inbox};
my $desc = ascii_html($obj->description);
+ my (%seen, @urls);
my $http = $obj->base_url($ctx->{env});
- chop $http;
- my %seen = ( $http => 1 );
- my @urls = ($http);
+ chop $http; # no trailing slash
+ my $part = $obj->max_git_part;
+ if (defined($part)) { # v2
+ # most recent partition first:
+ for (; $part >= 0; $part--) {
+ my $url = "$http/$part";
+ $seen{$url} = 1;
+ push @urls, $url;
+ }
+ } else { # v1
+ $seen{$http} = 1;
+ push @urls, $http;
+ }
+
+ # FIXME: partitioning in can be different in other repositories,
+ # use the "cloneurl" file as-is for now:
foreach my $u (@{$obj->cloneurl}) {
next if $seen{$u};
$seen{$u} = 1;
push @urls, $u =~ /\Ahttps?:/ ? qq(<a\nhref="$u">$u</a>) : $u;
}
+
if (scalar(@urls) == 1) {
- $urls .= " git clone --mirror $http";
+ $urls .= " git clone --mirror $urls[0]";
} else {
$urls .= "\n" .
join("\n", map { "\tgit clone --mirror $_" } @urls);
diff --git a/t/psgi_v2.t b/t/psgi_v2.t
index 2a798d6..9964b47 100644
--- a/t/psgi_v2.t
+++ b/t/psgi_v2.t
@@ -165,6 +165,11 @@ test_psgi(sub { $www->call(@_) }, sub {
$res = $cb->(GET('/v2test/reuse@mid/t/'));
$raw = $res->content;
like($raw, qr/\b4\+ messages\b/, 'thread overview shown with /t/');
+
+ $res = $cb->(GET('/v2test/0/info/refs'));
+ is($res->code, 200, 'got info refs for dumb clones');
+ $res = $cb->(GET('/v2test/info/refs'));
+ is($res->code, 404, 'unpartitioned git URL fails');
});
done_testing();
diff --git a/t/view.t b/t/view.t
index 22f5c7e..8ae4225 100644
--- a/t/view.t
+++ b/t/view.t
@@ -16,6 +16,7 @@ my $ctx = {
base_url => sub { 'http://example.com/' },
cloneurl => sub {[]},
nntp_url => sub {[]},
+ max_git_part => sub { undef },
description => sub { '' }),
};
$ctx->{-inbox}->{-primary_address} = 'test@example.com';
--
EW
prev parent reply other threads:[~2018-03-27 21:27 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-03-27 21:26 [PATCH 0/3] http cloning for v2 + bugfixes Eric Wong (Contractor, The Linux Foundation)
2018-03-27 21:26 ` [PATCH 1/3] http: fix modification of read-only value Eric Wong (Contractor, The Linux Foundation)
2018-03-27 21:27 ` [PATCH 2/3] githttpbackend: avoid infinite loop on generic PSGI servers Eric Wong (Contractor, The Linux Foundation)
2018-03-27 21:27 ` Eric Wong (Contractor, The Linux Foundation) [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180327212701.11623-4-e@80x24.org \
--to=e@80x24.org \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).