* [PATCH] fetch: fix half-baked v1 manifest.js.gz handling
@ 2021-09-12 9:12 Eric Wong
2021-09-12 10:53 ` [PATCH 2/1] fetch: fix and test v2 epoch detection Eric Wong
0 siblings, 1 reply; 2+ messages in thread
From: Eric Wong @ 2021-09-12 9:12 UTC (permalink / raw)
To: meta
The v1 code path was totally half-baked after the change
to use manifest.js.gz :x
Fixes: ffb7fbda6869db4b ("fetch: use manifest.js.gz for v1")
---
lib/PublicInbox/Fetch.pm | 15 ++++++++-------
lib/PublicInbox/LeiMirror.pm | 18 +++++++++---------
t/lei-mirror.t | 8 ++++++++
3 files changed, 25 insertions(+), 16 deletions(-)
diff --git a/lib/PublicInbox/Fetch.pm b/lib/PublicInbox/Fetch.pm
index 9613a582..2c6ae86a 100644
--- a/lib/PublicInbox/Fetch.pm
+++ b/lib/PublicInbox/Fetch.pm
@@ -77,9 +77,9 @@ sub do_manifest ($$$) {
my $t1 = $cur->{modified} // next;
delete($mdiff->{$k}) if $f0 eq $f1 && $t0 == $t1;
}
- my ($path_pfx, $v1_bare, @v2_epochs) =
+ my (undef, $v1_path, @v2_epochs) =
PublicInbox::LeiMirror::deduce_epochs($mdiff, $ibx_uri->path);
- [ 200, $path_pfx, $v1_bare, \@v2_epochs, $muri, $ft, $mf ];
+ [ 200, $v1_path, \@v2_epochs, $muri, $ft, $mf ];
}
sub do_fetch {
@@ -117,7 +117,7 @@ EOM
}
$lei->qerr("# inbox URL: $ibx_uri/");
my $res = do_manifest($lei, $dir, $ibx_uri) or return;
- my ($code, $path_pfx, $v1_bare, $v2_epochs, $muri, $ft, $mf) = @$res;
+ my ($code, $v1_path, $v2_epochs, $muri, $ft, $mf) = @$res;
return if $code == 304;
if ($code == 404) {
# any pre-manifest.js.gz instances running? Just fetch all
@@ -130,14 +130,13 @@ EOM
$code == 200 or die "BUG unexpected code $code\n";
}
if ($ibx_ver == 2) {
- defined($v1_bare) and warn <<EOM;
-E: got v1 `$v1_bare' when expecting v2 epoch(s) in <$muri>, WTF?
+ defined($v1_path) and warn <<EOM;
+E: got v1 `$v1_path' when expecting v2 epoch(s) in <$muri>, WTF?
EOM
@git_dir = map { "$dir/git/$_.git" } sort { $a <=> $b }
map { my ($nr) = (m!/([0-9]+)\.git\z!g) } @$v2_epochs;
} else {
- $v1_bare eq $dir or warn "$v1_bare != $dir";
- $git_dir[0] = $v1_bare // $dir;
+ $git_dir[0] = $dir;
}
# n.b. this expects all epochs are from the same host
my $torsocks = $lei->{curl}->torsocks($lei, $muri);
@@ -150,6 +149,8 @@ EOM
} else {
my $e_uri = $ibx_uri->clone;
my ($epath) = ($d =~ m!/(git/[0-9]+\.git)\z!);
+ defined($epath) or
+ die "BUG: $d is not an epoch to clone\n";
$e_uri->path($ibx_uri->path.$epath);
$cmd = [ @$torsocks,
PublicInbox::LeiMirror::clone_cmd($lei, $opt),
diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm
index 23813dcf..254848c9 100644
--- a/lib/PublicInbox/LeiMirror.pm
+++ b/lib/PublicInbox/LeiMirror.pm
@@ -238,15 +238,15 @@ failed to extract epoch number from $src
# PSGI mount prefixes and manifest.js.gz prefixes don't always align...
sub deduce_epochs ($$) {
my ($m, $path) = @_;
- my ($v1_bare, @v2_epochs);
+ my ($v1_ent, @v2_epochs);
my $path_pfx = '';
$path =~ s!/+\z!!;
do {
- $v1_bare = $m->{$path};
+ $v1_ent = $m->{$path};
@v2_epochs = grep(m!\A\Q$path\E/git/[0-9]+\.git\z!, keys %$m);
- } while (!defined($v1_bare) && !@v2_epochs &&
+ } while (!defined($v1_ent) && !@v2_epochs &&
$path =~ s!\A(/[^/]+)/!/! and $path_pfx .= $1);
- ($path_pfx, $v1_bare, @v2_epochs);
+ ($path_pfx, $v1_ent ? $path : undef, @v2_epochs);
}
sub decode_manifest ($$$) {
@@ -282,20 +282,20 @@ sub try_manifest {
return $lei->child_error($cerr, "@$cmd failed");
}
my $m = decode_manifest($ft, $fn, $uri);
- my ($path_pfx, $v1_bare, @v2_epochs) = deduce_epochs($m, $path);
+ my ($path_pfx, $v1_path, @v2_epochs) = deduce_epochs($m, $path);
if (@v2_epochs) {
# It may be possible to have v1 + v2 in parallel someday:
- $lei->err(<<EOM) if defined $v1_bare;
-# `$v1_bare' appears to be a v1 inbox while v2 epochs exist:
+ $lei->err(<<EOM) if defined $v1_path;
+# `$v1_path' appears to be a v1 inbox while v2 epochs exist:
# @v2_epochs
-# ignoring $v1_bare (use --inbox-version=1 to force v1 instead)
+# ignoring $v1_path (use --inbox-version=1 to force v1 instead)
EOM
@v2_epochs = map {
$uri->path($path_pfx.$_);
$uri->clone
} @v2_epochs;
clone_v2($self, \@v2_epochs);
- } elsif (defined $v1_bare) {
+ } elsif (defined $v1_path) {
clone_v1($self);
} else {
die "E: confused by <$uri>, possible matches:\n\t",
diff --git a/t/lei-mirror.t b/t/lei-mirror.t
index 35b77cf7..7db49e15 100644
--- a/t/lei-mirror.t
+++ b/t/lei-mirror.t
@@ -110,6 +110,14 @@ SKIP: {
ok(!-e "$d/t1/mirror.done", 'no leftover file');
ok(run_script([qw(-fetch -q -C), "$d/t1"], undef, $opt),
'fetching v1 works');
+ unlink("$d/t1/manifest.js.gz") or xbail "unlink $!";
+ my $before = [ glob("$d/t1/*") ];
+ ok(run_script([qw(-fetch -q -C), "$d/t1"], undef, $opt),
+ 'fetching v1 works w/o manifest.js.gz');
+ unlink("$d/t1/FETCH_HEAD"); # git internal
+ ok(unlink("$d/t1/manifest.js.gz"), 'manifest created');
+ my $after = [ glob("$d/t1/*") ];
+ is_deeply($before, $after, 'no new files created');
}
ok($td->kill, 'killed -httpd');
^ permalink raw reply related [flat|nested] 2+ messages in thread
* [PATCH 2/1] fetch: fix and test v2 epoch detection
2021-09-12 9:12 [PATCH] fetch: fix half-baked v1 manifest.js.gz handling Eric Wong
@ 2021-09-12 10:53 ` Eric Wong
0 siblings, 0 replies; 2+ messages in thread
From: Eric Wong @ 2021-09-12 10:53 UTC (permalink / raw)
To: meta
It was also totally broken by the change to use manifest.js.gz
for v1 :x
Fixes: ffb7fbda6869db4b ("fetch: use manifest.js.gz for v1")
---
lib/PublicInbox/Fetch.pm | 2 +-
t/v2mirror.t | 31 ++++++++++++++++---------------
2 files changed, 17 insertions(+), 16 deletions(-)
diff --git a/lib/PublicInbox/Fetch.pm b/lib/PublicInbox/Fetch.pm
index 2c6ae86a..4d501108 100644
--- a/lib/PublicInbox/Fetch.pm
+++ b/lib/PublicInbox/Fetch.pm
@@ -148,7 +148,7 @@ EOM
$cmd = [ @$torsocks, fetch_cmd($lei, $opt) ];
} else {
my $e_uri = $ibx_uri->clone;
- my ($epath) = ($d =~ m!/(git/[0-9]+\.git)\z!);
+ my ($epath) = ($d =~ m!(/git/[0-9]+\.git)\z!);
defined($epath) or
die "BUG: $d is not an epoch to clone\n";
$e_uri->path($ibx_uri->path.$epath);
diff --git a/t/v2mirror.t b/t/v2mirror.t
index 012e5bd2..b0075fcc 100644
--- a/t/v2mirror.t
+++ b/t/v2mirror.t
@@ -65,20 +65,16 @@ $v2w->done;
$ibx->cleanup;
my $sock = tcp_server();
-my $cmd = [ '-httpd', '-W0', "--stdout=$tmpdir/out", "--stderr=$tmpdir/err" ];
-my $td = start_script($cmd, undef, { 3 => $sock });
+my @cmd = ('-httpd', '-W0', "--stdout=$tmpdir/out", "--stderr=$tmpdir/err");
+my $td = start_script(\@cmd, undef, { 3 => $sock });
my ($host, $port) = tcp_host_port($sock);
$sock = undef;
-my @cmd;
-foreach my $i (0..$epoch_max) {
- my $sfx = $i == 0 ? '.git' : '';
- @cmd = (qw(git clone --mirror -q),
- "http://$host:$port/v2/$i$sfx",
- "$tmpdir/m/git/$i.git");
+@cmd = (qw(-clone -q), "http://$host:$port/v2/", "$tmpdir/m");
+run_script(\@cmd) or xbail '-clone';
- is(xsys(@cmd), 0, "cloned $i.git");
- ok(-d "$tmpdir/m/git/$i.git", "mirror $i OK");
+for my $i (0..$epoch_max) {
+ ok(-d "$tmpdir/m/git/$i.git", "epoch $i cloned");
}
@cmd = ("-init", '-j1', '-V2', 'm', "$tmpdir/m", 'http://example.com/m',
@@ -93,7 +89,6 @@ my $mibx = { inboxdir => "$tmpdir/m", address => 'alt@example.com' };
$mibx = PublicInbox::Inbox->new($mibx);
is_deeply([$mibx->mm->minmax], [$ibx->mm->minmax], 'index synched minmax');
-$v2w->{rotate_bytes} = $old_rotate_bytes;
for my $i (10..15) {
$mime->header_set('Message-ID', "<$i\@example.com>");
$mime->header_set('Subject', "subject = $i");
@@ -102,12 +97,17 @@ for my $i (10..15) {
$v2w->done;
$ibx->cleanup;
+my @new_epochs;
my $fetch_each_epoch = sub {
- foreach my $i (0..$epoch_max) {
- my $dir = "$tmpdir/m/git/$i.git";
- is(xsys('git', "--git-dir=$dir", 'fetch', '-q'), 0,
- 'fetch successful');
+ my $mf = "$tmpdir/m/manifest.js.gz";
+ if (my @st = stat($mf)) {
+ utime($st[8], $st[9] - 1, $mf) or xbail "utime $mf: $!";
}
+ my %before = map { $_ => 1 } glob("$tmpdir/m/git/*");
+ run_script([qw(-fetch -q)], undef, {-C => "$tmpdir/m"}) or
+ xbail '-fetch fail';
+ my @after = grep { !$before{$_} } glob("$tmpdir/m/git/*");
+ push @new_epochs, @after;
};
$fetch_each_epoch->();
@@ -233,6 +233,7 @@ EOF
$mset = $mibx->search->reopen->mset('m:2big@a');
is(scalar($mset->items), 0, 'large message not re-indexed');
}
+ok(scalar(@new_epochs), 'new epochs were created and fetched');
ok($td->kill, 'killed httpd');
$td->join;
^ permalink raw reply related [flat|nested] 2+ messages in thread
end of thread, other threads:[~2021-09-12 10:53 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2021-09-12 9:12 [PATCH] fetch: fix half-baked v1 manifest.js.gz handling Eric Wong
2021-09-12 10:53 ` [PATCH 2/1] fetch: fix and test v2 epoch detection Eric Wong
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).