unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
From: "Eric Wong (Contractor, The Linux Foundation)" <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 3/3] www: support cloning individual v2 git partitions
Date: Tue, 27 Mar 2018 21:27:01 +0000	[thread overview]
Message-ID: <20180327212701.11623-4-e@80x24.org> (raw)
In-Reply-To: <20180327212701.11623-1-e@80x24.org>

This will require multiple client invocations, but should reduce
load on the server and make it easier for readers to only clone
the latest data.

Unfortunately, supporting a cloneurl file for externally-hosted
repos will be more difficult as we cannot easily know if the
clones use v1 or v2 repositories, or how many git partitions
they have.
---
 lib/PublicInbox/Inbox.pm     | 37 ++++++++++++++++++++++++++++++++++++-
 lib/PublicInbox/WWW.pm       | 19 +++++++++++--------
 lib/PublicInbox/WwwStream.pm | 23 +++++++++++++++++++----
 t/psgi_v2.t                  |  5 +++++
 t/view.t                     |  1 +
 5 files changed, 72 insertions(+), 13 deletions(-)

diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm
index b1ea8dc..3097751 100644
--- a/lib/PublicInbox/Inbox.pm
+++ b/lib/PublicInbox/Inbox.pm
@@ -82,6 +82,18 @@ sub new {
 	bless $opts, $class;
 }
 
+sub git_part {
+	my ($self, $part) = @_;
+	($self->{version} || 1) == 2 or return;
+	$self->{"$part.git"} ||= eval {
+		my $git_dir = "$self->{mainrepo}/git/$part.git";
+		my $g = PublicInbox::Git->new($git_dir);
+		$g->{-httpbackend_limiter} = $self->{-httpbackend_limiter};
+		# no cleanup needed, we never cat-file off this, only clone
+		$g;
+	};
+}
+
 sub git {
 	my ($self) = @_;
 	$self->{git} ||= eval {
@@ -94,6 +106,29 @@ sub git {
 	};
 }
 
+sub max_git_part {
+	my ($self) = @_;
+	my $v = $self->{version};
+	return unless defined($v) && $v == 2;
+	my $part = $self->{-max_git_part};
+	my $changed = git($self)->alternates_changed;
+	if (!defined($part) || $changed) {
+		$self->git->cleanup if $changed;
+		my $gits = "$self->{mainrepo}/git";
+		if (opendir my $dh, $gits) {
+			my $max = -1;
+			while (defined(my $git_dir = readdir($dh))) {
+				$git_dir =~ m!\A(\d+)\.git\z! or next;
+				$max = $1 if $1 > $max;
+			}
+			$part = $self->{-max_git_part} = $max if $max >= 0;
+		} else {
+			warn "opendir $gits failed: $!\n";
+		}
+	}
+	$part;
+}
+
 sub mm {
 	my ($self) = @_;
 	$self->{mm} ||= eval {
@@ -133,7 +168,7 @@ sub description {
 	local $/ = "\n";
 	chomp $desc;
 	$desc =~ s/\s+/ /smg;
-	$desc = '($GIT_DIR/description missing)' if $desc eq '';
+	$desc = '($REPO_DIR/description missing)' if $desc eq '';
 	$self->{description} = $desc;
 }
 
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index a2c2a4a..7bd2973 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -54,10 +54,10 @@ sub call {
 	my $method = $env->{REQUEST_METHOD};
 
 	if ($method eq 'POST') {
-		if ($path_info =~ m!$INBOX_RE/(git-upload-pack)\z!) {
-			my $path = $2;
+		if ($path_info =~ m!$INBOX_RE/(?:(\d+)/)?(git-upload-pack)\z!) {
+			my ($part, $path) = ($2, $3);
 			return invalid_inbox($ctx, $1) ||
-				serve_git($ctx, $path);
+				serve_git($ctx, $part, $path);
 		} elsif ($path_info =~ m!$INBOX_RE/!o) {
 			return invalid_inbox($ctx, $1) || mbox_results($ctx);
 		}
@@ -77,10 +77,10 @@ sub call {
 		invalid_inbox($ctx, $1) || get_atom($ctx);
 	} elsif ($path_info =~ m!$INBOX_RE/new\.html\z!o) {
 		invalid_inbox($ctx, $1) || get_new($ctx);
-	} elsif ($path_info =~ m!$INBOX_RE/
+	} elsif ($path_info =~ m!$INBOX_RE/(?:(\d+)/)?
 				($PublicInbox::GitHTTPBackend::ANY)\z!ox) {
-		my $path = $2;
-		invalid_inbox($ctx, $1) || serve_git($ctx, $path);
+		my ($part, $path) = ($2, $3);
+		invalid_inbox($ctx, $1) || serve_git($ctx, $part, $path);
 	} elsif ($path_info =~ m!$INBOX_RE/([\w-]+).mbox\.gz\z!o) {
 		serve_mbox_range($ctx, $1, $2);
 	} elsif ($path_info =~ m!$INBOX_RE/$MID_RE/$END_RE\z!o) {
@@ -393,8 +393,11 @@ sub msg_page {
 }
 
 sub serve_git {
-	my ($ctx, $path) = @_;
-	PublicInbox::GitHTTPBackend::serve($ctx->{env}, $ctx->{git}, $path);
+	my ($ctx, $part, $path) = @_;
+	my $env = $ctx->{env};
+	my $ibx = $ctx->{-inbox};
+	my $git = defined $part ? $ibx->git_part($part) : $ibx->git;
+	$git ? PublicInbox::GitHTTPBackend::serve($env, $git, $path) : r404();
 }
 
 sub mbox_results {
diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm
index 0551998..7631754 100644
--- a/lib/PublicInbox/WwwStream.pm
+++ b/lib/PublicInbox/WwwStream.pm
@@ -72,17 +72,32 @@ sub _html_end {
 	my $obj = $ctx->{-inbox};
 	my $desc = ascii_html($obj->description);
 
+	my (%seen, @urls);
 	my $http = $obj->base_url($ctx->{env});
-	chop $http;
-	my %seen = ( $http => 1 );
-	my @urls = ($http);
+	chop $http; # no trailing slash
+	my $part = $obj->max_git_part;
+	if (defined($part)) { # v2
+		# most recent partition first:
+		for (; $part >= 0; $part--) {
+			my $url = "$http/$part";
+			$seen{$url} = 1;
+			push @urls, $url;
+		}
+	} else { # v1
+		$seen{$http} = 1;
+		push @urls, $http;
+	}
+
+	# FIXME: partitioning in can be different in other repositories,
+	# use the "cloneurl" file as-is for now:
 	foreach my $u (@{$obj->cloneurl}) {
 		next if $seen{$u};
 		$seen{$u} = 1;
 		push @urls, $u =~ /\Ahttps?:/ ? qq(<a\nhref="$u">$u</a>) : $u;
 	}
+
 	if (scalar(@urls) == 1) {
-		$urls .= " git clone --mirror $http";
+		$urls .= " git clone --mirror $urls[0]";
 	} else {
 		$urls .= "\n" .
 			join("\n", map { "\tgit clone --mirror $_" } @urls);
diff --git a/t/psgi_v2.t b/t/psgi_v2.t
index 2a798d6..9964b47 100644
--- a/t/psgi_v2.t
+++ b/t/psgi_v2.t
@@ -165,6 +165,11 @@ test_psgi(sub { $www->call(@_) }, sub {
 	$res = $cb->(GET('/v2test/reuse@mid/t/'));
 	$raw = $res->content;
 	like($raw, qr/\b4\+ messages\b/, 'thread overview shown with /t/');
+
+	$res = $cb->(GET('/v2test/0/info/refs'));
+	is($res->code, 200, 'got info refs for dumb clones');
+	$res = $cb->(GET('/v2test/info/refs'));
+	is($res->code, 404, 'unpartitioned git URL fails');
 });
 
 done_testing();
diff --git a/t/view.t b/t/view.t
index 22f5c7e..8ae4225 100644
--- a/t/view.t
+++ b/t/view.t
@@ -16,6 +16,7 @@ my $ctx = {
 		base_url => sub { 'http://example.com/' },
 		cloneurl => sub {[]},
 		nntp_url => sub {[]},
+		max_git_part => sub { undef },
 		description => sub { '' }),
 };
 $ctx->{-inbox}->{-primary_address} = 'test@example.com';
-- 
EW


      parent reply	other threads:[~2018-03-27 21:27 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-27 21:26 [PATCH 0/3] http cloning for v2 + bugfixes Eric Wong (Contractor, The Linux Foundation)
2018-03-27 21:26 ` [PATCH 1/3] http: fix modification of read-only value Eric Wong (Contractor, The Linux Foundation)
2018-03-27 21:27 ` [PATCH 2/3] githttpbackend: avoid infinite loop on generic PSGI servers Eric Wong (Contractor, The Linux Foundation)
2018-03-27 21:27 ` Eric Wong (Contractor, The Linux Foundation) [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180327212701.11623-4-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).