unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
* [PATCH] fetch: fix half-baked v1 manifest.js.gz handling
@ 2021-09-12  9:12 Eric Wong
  2021-09-12 10:53 ` [PATCH 2/1] fetch: fix and test v2 epoch detection Eric Wong
  0 siblings, 1 reply; 2+ messages in thread
From: Eric Wong @ 2021-09-12  9:12 UTC (permalink / raw)
  To: meta

The v1 code path was totally half-baked after the change
to use manifest.js.gz :x

Fixes: ffb7fbda6869db4b ("fetch: use manifest.js.gz for v1")
---
 lib/PublicInbox/Fetch.pm     | 15 ++++++++-------
 lib/PublicInbox/LeiMirror.pm | 18 +++++++++---------
 t/lei-mirror.t               |  8 ++++++++
 3 files changed, 25 insertions(+), 16 deletions(-)

diff --git a/lib/PublicInbox/Fetch.pm b/lib/PublicInbox/Fetch.pm
index 9613a582..2c6ae86a 100644
--- a/lib/PublicInbox/Fetch.pm
+++ b/lib/PublicInbox/Fetch.pm
@@ -77,9 +77,9 @@ sub do_manifest ($$$) {
 		my $t1 = $cur->{modified} // next;
 		delete($mdiff->{$k}) if $f0 eq $f1 && $t0 == $t1;
 	}
-	my ($path_pfx, $v1_bare, @v2_epochs) =
+	my (undef, $v1_path, @v2_epochs) =
 		PublicInbox::LeiMirror::deduce_epochs($mdiff, $ibx_uri->path);
-	[ 200, $path_pfx, $v1_bare, \@v2_epochs, $muri, $ft, $mf ];
+	[ 200, $v1_path, \@v2_epochs, $muri, $ft, $mf ];
 }
 
 sub do_fetch {
@@ -117,7 +117,7 @@ EOM
 	}
 	$lei->qerr("# inbox URL: $ibx_uri/");
 	my $res = do_manifest($lei, $dir, $ibx_uri) or return;
-	my ($code, $path_pfx, $v1_bare, $v2_epochs, $muri, $ft, $mf) = @$res;
+	my ($code, $v1_path, $v2_epochs, $muri, $ft, $mf) = @$res;
 	return if $code == 304;
 	if ($code == 404) {
 		# any pre-manifest.js.gz instances running? Just fetch all
@@ -130,14 +130,13 @@ EOM
 		$code == 200 or die "BUG unexpected code $code\n";
 	}
 	if ($ibx_ver == 2) {
-		defined($v1_bare) and warn <<EOM;
-E: got v1 `$v1_bare' when expecting v2 epoch(s) in <$muri>, WTF?
+		defined($v1_path) and warn <<EOM;
+E: got v1 `$v1_path' when expecting v2 epoch(s) in <$muri>, WTF?
 EOM
 		@git_dir = map { "$dir/git/$_.git" } sort { $a <=> $b }
 			map { my ($nr) = (m!/([0-9]+)\.git\z!g) } @$v2_epochs;
 	} else {
-		$v1_bare eq $dir or warn "$v1_bare != $dir";
-		$git_dir[0] = $v1_bare // $dir;
+		$git_dir[0] = $dir;
 	}
 	# n.b. this expects all epochs are from the same host
 	my $torsocks = $lei->{curl}->torsocks($lei, $muri);
@@ -150,6 +149,8 @@ EOM
 		} else {
 			my $e_uri = $ibx_uri->clone;
 			my ($epath) = ($d =~ m!/(git/[0-9]+\.git)\z!);
+			defined($epath) or
+				die "BUG: $d is not an epoch to clone\n";
 			$e_uri->path($ibx_uri->path.$epath);
 			$cmd = [ @$torsocks,
 				PublicInbox::LeiMirror::clone_cmd($lei, $opt),
diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm
index 23813dcf..254848c9 100644
--- a/lib/PublicInbox/LeiMirror.pm
+++ b/lib/PublicInbox/LeiMirror.pm
@@ -238,15 +238,15 @@ failed to extract epoch number from $src
 # PSGI mount prefixes and manifest.js.gz prefixes don't always align...
 sub deduce_epochs ($$) {
 	my ($m, $path) = @_;
-	my ($v1_bare, @v2_epochs);
+	my ($v1_ent, @v2_epochs);
 	my $path_pfx = '';
 	$path =~ s!/+\z!!;
 	do {
-		$v1_bare = $m->{$path};
+		$v1_ent = $m->{$path};
 		@v2_epochs = grep(m!\A\Q$path\E/git/[0-9]+\.git\z!, keys %$m);
-	} while (!defined($v1_bare) && !@v2_epochs &&
+	} while (!defined($v1_ent) && !@v2_epochs &&
 		$path =~ s!\A(/[^/]+)/!/! and $path_pfx .= $1);
-	($path_pfx, $v1_bare, @v2_epochs);
+	($path_pfx, $v1_ent ? $path : undef, @v2_epochs);
 }
 
 sub decode_manifest ($$$) {
@@ -282,20 +282,20 @@ sub try_manifest {
 		return $lei->child_error($cerr, "@$cmd failed");
 	}
 	my $m = decode_manifest($ft, $fn, $uri);
-	my ($path_pfx, $v1_bare, @v2_epochs) = deduce_epochs($m, $path);
+	my ($path_pfx, $v1_path, @v2_epochs) = deduce_epochs($m, $path);
 	if (@v2_epochs) {
 		# It may be possible to have v1 + v2 in parallel someday:
-		$lei->err(<<EOM) if defined $v1_bare;
-# `$v1_bare' appears to be a v1 inbox while v2 epochs exist:
+		$lei->err(<<EOM) if defined $v1_path;
+# `$v1_path' appears to be a v1 inbox while v2 epochs exist:
 # @v2_epochs
-# ignoring $v1_bare (use --inbox-version=1 to force v1 instead)
+# ignoring $v1_path (use --inbox-version=1 to force v1 instead)
 EOM
 		@v2_epochs = map {
 			$uri->path($path_pfx.$_);
 			$uri->clone
 		} @v2_epochs;
 		clone_v2($self, \@v2_epochs);
-	} elsif (defined $v1_bare) {
+	} elsif (defined $v1_path) {
 		clone_v1($self);
 	} else {
 		die "E: confused by <$uri>, possible matches:\n\t",
diff --git a/t/lei-mirror.t b/t/lei-mirror.t
index 35b77cf7..7db49e15 100644
--- a/t/lei-mirror.t
+++ b/t/lei-mirror.t
@@ -110,6 +110,14 @@ SKIP: {
 	ok(!-e "$d/t1/mirror.done", 'no leftover file');
 	ok(run_script([qw(-fetch -q -C), "$d/t1"], undef, $opt),
 		'fetching v1 works');
+	unlink("$d/t1/manifest.js.gz") or xbail "unlink $!";
+	my $before = [ glob("$d/t1/*") ];
+	ok(run_script([qw(-fetch -q -C), "$d/t1"], undef, $opt),
+		'fetching v1 works w/o manifest.js.gz');
+	unlink("$d/t1/FETCH_HEAD"); # git internal
+	ok(unlink("$d/t1/manifest.js.gz"), 'manifest created');
+	my $after = [ glob("$d/t1/*") ];
+	is_deeply($before, $after, 'no new files created');
 }
 
 ok($td->kill, 'killed -httpd');

^ permalink raw reply related	[flat|nested] 2+ messages in thread

* [PATCH 2/1] fetch: fix and test v2 epoch detection
  2021-09-12  9:12 [PATCH] fetch: fix half-baked v1 manifest.js.gz handling Eric Wong
@ 2021-09-12 10:53 ` Eric Wong
  0 siblings, 0 replies; 2+ messages in thread
From: Eric Wong @ 2021-09-12 10:53 UTC (permalink / raw)
  To: meta

It was also totally broken by the change to use manifest.js.gz
for v1 :x

Fixes: ffb7fbda6869db4b ("fetch: use manifest.js.gz for v1")
---
 lib/PublicInbox/Fetch.pm |  2 +-
 t/v2mirror.t             | 31 ++++++++++++++++---------------
 2 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/lib/PublicInbox/Fetch.pm b/lib/PublicInbox/Fetch.pm
index 2c6ae86a..4d501108 100644
--- a/lib/PublicInbox/Fetch.pm
+++ b/lib/PublicInbox/Fetch.pm
@@ -148,7 +148,7 @@ EOM
 			$cmd = [ @$torsocks, fetch_cmd($lei, $opt) ];
 		} else {
 			my $e_uri = $ibx_uri->clone;
-			my ($epath) = ($d =~ m!/(git/[0-9]+\.git)\z!);
+			my ($epath) = ($d =~ m!(/git/[0-9]+\.git)\z!);
 			defined($epath) or
 				die "BUG: $d is not an epoch to clone\n";
 			$e_uri->path($ibx_uri->path.$epath);
diff --git a/t/v2mirror.t b/t/v2mirror.t
index 012e5bd2..b0075fcc 100644
--- a/t/v2mirror.t
+++ b/t/v2mirror.t
@@ -65,20 +65,16 @@ $v2w->done;
 $ibx->cleanup;
 
 my $sock = tcp_server();
-my $cmd = [ '-httpd', '-W0', "--stdout=$tmpdir/out", "--stderr=$tmpdir/err" ];
-my $td = start_script($cmd, undef, { 3 => $sock });
+my @cmd = ('-httpd', '-W0', "--stdout=$tmpdir/out", "--stderr=$tmpdir/err");
+my $td = start_script(\@cmd, undef, { 3 => $sock });
 my ($host, $port) = tcp_host_port($sock);
 $sock = undef;
 
-my @cmd;
-foreach my $i (0..$epoch_max) {
-	my $sfx = $i == 0 ? '.git' : '';
-	@cmd = (qw(git clone --mirror -q),
-		"http://$host:$port/v2/$i$sfx",
-		"$tmpdir/m/git/$i.git");
+@cmd = (qw(-clone -q), "http://$host:$port/v2/", "$tmpdir/m");
+run_script(\@cmd) or xbail '-clone';
 
-	is(xsys(@cmd), 0, "cloned $i.git");
-	ok(-d "$tmpdir/m/git/$i.git", "mirror $i OK");
+for my $i (0..$epoch_max) {
+	ok(-d "$tmpdir/m/git/$i.git", "epoch $i cloned");
 }
 
 @cmd = ("-init", '-j1', '-V2', 'm', "$tmpdir/m", 'http://example.com/m',
@@ -93,7 +89,6 @@ my $mibx = { inboxdir => "$tmpdir/m", address => 'alt@example.com' };
 $mibx = PublicInbox::Inbox->new($mibx);
 is_deeply([$mibx->mm->minmax], [$ibx->mm->minmax], 'index synched minmax');
 
-$v2w->{rotate_bytes} = $old_rotate_bytes;
 for my $i (10..15) {
 	$mime->header_set('Message-ID', "<$i\@example.com>");
 	$mime->header_set('Subject', "subject = $i");
@@ -102,12 +97,17 @@ for my $i (10..15) {
 $v2w->done;
 $ibx->cleanup;
 
+my @new_epochs;
 my $fetch_each_epoch = sub {
-	foreach my $i (0..$epoch_max) {
-		my $dir = "$tmpdir/m/git/$i.git";
-		is(xsys('git', "--git-dir=$dir", 'fetch', '-q'), 0,
-			'fetch successful');
+	my $mf = "$tmpdir/m/manifest.js.gz";
+	if (my @st = stat($mf)) {
+		utime($st[8], $st[9] - 1, $mf) or xbail "utime $mf: $!";
 	}
+	my %before = map { $_ => 1 } glob("$tmpdir/m/git/*");
+	run_script([qw(-fetch -q)], undef, {-C => "$tmpdir/m"}) or
+		xbail '-fetch fail';
+	my @after = grep { !$before{$_} } glob("$tmpdir/m/git/*");
+	push @new_epochs, @after;
 };
 
 $fetch_each_epoch->();
@@ -233,6 +233,7 @@ EOF
 	$mset = $mibx->search->reopen->mset('m:2big@a');
 	is(scalar($mset->items), 0, 'large message not re-indexed');
 }
+ok(scalar(@new_epochs), 'new epochs were created and fetched');
 
 ok($td->kill, 'killed httpd');
 $td->join;

^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2021-09-12 10:53 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-12  9:12 [PATCH] fetch: fix half-baked v1 manifest.js.gz handling Eric Wong
2021-09-12 10:53 ` [PATCH 2/1] fetch: fix and test v2 epoch detection Eric Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).