From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 4/5] clone|fetch|--mirror: cull manifest in partial mirrors
Date: Fri, 24 Sep 2021 10:56:44 +0000 [thread overview]
Message-ID: <20210924105645.8627-5-e@80x24.org> (raw)
In-Reply-To: <20210924105645.8627-1-e@80x24.org>
This makes it easier for users to enable fetching on a
previously read-only epoch. Prior to this change, users were
required to delete manifest.js.gz in addition to adding the
writable bit. Now, they just have to "chmod +w $EPOCH_DIR".
---
lib/PublicInbox/Fetch.pm | 17 +++++++++++++++--
lib/PublicInbox/LeiMirror.pm | 24 ++++++++++++++++++++----
t/v2mirror.t | 24 ++++++++++++++++++++++++
3 files changed, 59 insertions(+), 6 deletions(-)
diff --git a/lib/PublicInbox/Fetch.pm b/lib/PublicInbox/Fetch.pm
index 464ffe12..7f60b619 100644
--- a/lib/PublicInbox/Fetch.pm
+++ b/lib/PublicInbox/Fetch.pm
@@ -12,6 +12,8 @@ use PublicInbox::LEI;
use PublicInbox::LeiCurl;
use PublicInbox::LeiMirror;
use File::Temp ();
+use PublicInbox::Config;
+use IO::Compress::Gzip qw(gzip $GzipError);
sub new { bless {}, __PACKAGE__ }
@@ -81,7 +83,7 @@ sub do_manifest ($$$) {
}
my (undef, $v1_path, @v2_epochs) =
PublicInbox::LeiMirror::deduce_epochs($mdiff, $ibx_uri->path);
- [ 200, $v1_path, \@v2_epochs, $muri, $ft, $mf ];
+ [ 200, $v1_path, \@v2_epochs, $muri, $ft, $mf, $m1 ];
}
sub get_fingerprint2 {
@@ -133,7 +135,7 @@ EOM
PublicInbox::LeiMirror::write_makefile($dir, $ibx_ver);
$lei->qerr("# inbox URL: $ibx_uri/");
my $res = do_manifest($lei, $dir, $ibx_uri) or return;
- my ($code, $v1_path, $v2_epochs, $muri, $ft, $mf) = @$res;
+ my ($code, $v1_path, $v2_epochs, $muri, $ft, $mf, $m1) = @$res;
if ($code == 404) {
# any pre-manifest.js.gz instances running? Just fetch all
# existing ones and unconditionally try cloning the next
@@ -145,6 +147,7 @@ EOM
} else {
$code == 200 or die "BUG unexpected code $code\n";
}
+ my $mculled;
if ($ibx_ver == 2) {
defined($v1_path) and warn <<EOM;
E: got v1 `$v1_path' when expecting v2 epoch(s) in <$muri>, WTF?
@@ -153,6 +156,12 @@ EOM
my ($nr) = (m!/([0-9]+)\.git\z!g);
$skip->{$nr} ? () : $nr;
} @$v2_epochs;
+ if ($m1 && scalar keys %$skip) {
+ my $re = join('|', keys %$skip);
+ my @del = grep(m!/git/$re\.git\z!, keys %$m1);
+ delete @$m1{@del};
+ $mculled = 1;
+ }
} else {
$git_dir[0] = $dir;
}
@@ -193,6 +202,10 @@ EOM
for my $i (@new_epoch) { $mg->epoch_cfg_set($i) }
if ($ft) {
my $fn = $ft->filename;
+ if ($mculled) {
+ my $json = PublicInbox::Config->json->encode($m1);
+ gzip(\$json => $fn) or die "gzip: $GzipError";
+ }
rename($fn, $mf) or die "E: rename($fn, $mf): $!\n";
$ft->unlink_on_destroy(0);
}
diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm
index fe81b967..1ab5e0d8 100644
--- a/lib/PublicInbox/LeiMirror.pm
+++ b/lib/PublicInbox/LeiMirror.pm
@@ -6,7 +6,9 @@ package PublicInbox::LeiMirror;
use strict;
use v5.10.1;
use parent qw(PublicInbox::IPC);
+use PublicInbox::Config;
use IO::Uncompress::Gunzip qw(gunzip $GunzipError);
+use IO::Compress::Gzip qw(gzip $GzipError);
use PublicInbox::Spawn qw(popen_rd spawn run_die);
use File::Temp ();
use Fcntl qw(SEEK_SET O_CREAT O_EXCL O_WRONLY);
@@ -267,14 +269,14 @@ EOM
close $fh or die "close:($f): $!";
}
-sub clone_v2 ($$) {
- my ($self, $v2_epochs) = @_;
+sub clone_v2 ($$;$) {
+ my ($self, $v2_epochs, $m) = @_; # $m => manifest.js.gz hashref
my $lei = $self->{lei};
my $curl = $self->{curl} //= PublicInbox::LeiCurl->new($lei) or return;
my $pfx = $curl->torsocks($lei, (values %$v2_epochs)[0]) or return;
my $dst = $self->{dst};
my $want = parse_epochs($lei->{opt}->{epoch}, $v2_epochs);
- my (@src_edst, @read_only);
+ my (@src_edst, @read_only, @skip_nr);
for my $nr (sort { $a <=> $b } keys %$v2_epochs) {
my $uri = $v2_epochs->{$nr};
my $src = $uri->as_string;
@@ -289,8 +291,15 @@ failed to extract epoch number from $src
} else { # create a placeholder so users only need to chmod +w
init_placeholder($src, $edst);
push @read_only, $edst;
+ push @skip_nr, $nr;
}
}
+ if (@skip_nr) { # filter out the epochs we skipped
+ my $re = join('|', @skip_nr);
+ my @del = grep(m!/git/$re\.git\z!, keys %$m);
+ delete @$m{@del};
+ $self->{-culled_manifest} = 1;
+ }
my $lk = bless { lock_path => "$dst/inbox.lock" }, 'PublicInbox::Lock';
_try_config($self);
my $on_destroy = $lk->lock_for_scope($$);
@@ -379,13 +388,20 @@ EOM
my ($n) = ("$uri" =~ m!/([0-9]+)\.git\z!);
$n => $uri->clone
} @v2_epochs;
- clone_v2($self, \%v2_epochs);
+ clone_v2($self, \%v2_epochs, $m);
} elsif (defined $v1_path) {
clone_v1($self);
} else {
die "E: confused by <$uri>, possible matches:\n\t",
join(', ', sort keys %$m), "\n";
}
+ if (delete $self->{-culled_manifest}) { # set by clone_v2
+ # write the smaller manifest if epochs were skipped so
+ # users won't have to delete manifest if they +w an
+ # epoch they no longer want to skip
+ my $json = PublicInbox::Config->json->encode($m);
+ gzip(\$json => $fn) or die "gzip: $GzipError";
+ }
my $fin = "$self->{dst}/manifest.js.gz";
rename($fn, $fin) or die "E: rename($fn, $fin): $!";
$ft->unlink_on_destroy(0);
diff --git a/t/v2mirror.t b/t/v2mirror.t
index 1231b72d..fa4a717d 100644
--- a/t/v2mirror.t
+++ b/t/v2mirror.t
@@ -9,6 +9,7 @@ use PublicInbox::Spawn qw(which);
require_git(2.6);
require_cmd('curl');
local $ENV{HOME} = abs_path('t');
+use IO::Uncompress::Gunzip qw(gunzip $GunzipError);
# Integration tests for HTTP cloning + mirroring
require_mods(qw(Plack::Util Plack::Builder
@@ -288,6 +289,29 @@ if ('test read-only epoch dirs') {
is_deeply(\@g2, \@g, 'cloned again');
is(scalar(grep { -w $_ } @g2), scalar(@w) + 1,
'got one more cloned epoch');
+
+ # make 0.git writable and fetch into it, relies on culled manifest
+ chmod(0755, $g2[0]) or xbail "chmod: $!";
+ my @before = glob("$g2[0]/objects/*/*");
+ run_script([qw(-fetch -q)], undef, { -C => $dst });
+ is($?, 0, 'no error from partial fetch');
+ my @after = glob("$g2[0]/objects/*/*");
+ ok(scalar(@before) < scalar(@after), 'fetched after chmod 0755 0.git');
+
+ # ensure culled manifest is maintained after fetch
+ gunzip("$dst/manifest.js.gz" => \(my $m), MultiStream => 1) or
+ xbail "gunzip: $GunzipError";
+ $m = PublicInbox::Config->json->decode($m);
+ for my $k (keys %$m) { # /$name/git/$N.git
+ my ($nr) = ($k =~ m!/git/([0-9]+)\.git\z!);
+ ok(-w "$dst/git/$nr.git", "writable $nr.git in manifest");
+ }
+ for my $ro (grep { !-w $_ } @g2) {
+ my ($nr) = ($ro =~ m!/git/([0-9]+)\.git\z!);
+ is(grep(m!/git/$nr\.git\z!, keys %$m), 0,
+ "read-only $nr.git not in manifest")
+ or xbail([sort keys %$m]);
+ }
}
my $err = '';
next prev parent reply other threads:[~2021-09-24 10:56 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-09-24 10:56 [PATCH 0/5] clone|fetch: flesh out partial mirror support Eric Wong
2021-09-24 10:56 ` [PATCH 1/5] clone|--mirror: support --epoch=RANGE for partial clones Eric Wong
2021-09-24 10:56 ` [PATCH 2/5] fetch: fix skipping with multi-epoch inboxes Eric Wong
2021-09-24 10:56 ` [PATCH 3/5] clone|--mirror: fix and test against pre-manifest WWW Eric Wong
2021-09-24 10:56 ` Eric Wong [this message]
2021-09-24 10:56 ` [PATCH 5/5] fetch: support v2 w/o manifest on old WWW Eric Wong
2021-09-25 3:21 ` [PATCH 6/5] t/v2mirror: check dependencies for legacy test Eric Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210924105645.8627-5-e@80x24.org \
--to=e@80x24.org \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).