unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
* [PATCH 0/2] lei_mirror: more tweaks
@ 2023-02-12 23:18 Eric Wong
  2023-02-12 23:18 ` [PATCH 1/2] lei_mirror: further reduce `git config' calls Eric Wong
  2023-02-12 23:18 ` [PATCH 2/2] lei_mirror: fetch most-recently-updated repos, first Eric Wong
  0 siblings, 2 replies; 3+ messages in thread
From: Eric Wong @ 2023-02-12 23:18 UTC (permalink / raw)
  To: meta

The proposed-for-git `fetch.hideRefs' isn't supported, yet;
I'm still testing to see if it's harmful for new clones
(I suspect so), and how to reduce it's impact while still
being able to clone all kernel forks on kernel.org
supporting RAM-constrained systems.
https://public-inbox.org/git/20230212090426.M558990@dcvr/
("fetch: support hideRefs to speed up connectivity checks")

Eric Wong (2):
  lei_mirror: further reduce `git config' calls
  lei_mirror: fetch most-recently-updated repos, first

 lib/PublicInbox/LeiMirror.pm | 80 ++++++++++++++++++++++--------------
 1 file changed, 49 insertions(+), 31 deletions(-)

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH 1/2] lei_mirror: further reduce `git config' calls
  2023-02-12 23:18 [PATCH 0/2] lei_mirror: more tweaks Eric Wong
@ 2023-02-12 23:18 ` Eric Wong
  2023-02-12 23:18 ` [PATCH 2/2] lei_mirror: fetch most-recently-updated repos, first Eric Wong
  1 sibling, 0 replies; 3+ messages in thread
From: Eric Wong @ 2023-02-12 23:18 UTC (permalink / raw)
  To: meta

We can parse the config at once and avoid clobbering variables
which do not need changing.  We'll also do some prep work for
fetch.hideRefs proposal being discussed at
<https://public-inbox.org/git/20230209122857.M669733@dcvr/>
---
 lib/PublicInbox/LeiMirror.pm | 62 +++++++++++++++++++++---------------
 1 file changed, 36 insertions(+), 26 deletions(-)

diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm
index d959b6b6..dd6356bb 100644
--- a/lib/PublicInbox/LeiMirror.pm
+++ b/lib/PublicInbox/LeiMirror.pm
@@ -401,23 +401,42 @@ sub fgrp_fetch_all {
 	my $opt = {};
 	my @fetch = do {
 		local $self->{lei}->{opt}->{jobs} = 1;
-		(fetch_args($self->{lei}, $opt),
-			qw(--no-tags --multiple));
+		(fetch_args($self->{lei}, $opt), qw(--no-tags --multiple));
 	};
 	push(@fetch, "-j$j") if $j;
 	while (my ($osdir, $fgrpv) = each %$todo) {
 		my $f = "$osdir/config";
 		return if !keep_going($self);
 
+		my $cmd = ['git', "--git-dir=$osdir", qw(config -f), $f ];
 		# clobber group from previous run atomically
-		my $cmd = ['git', "--git-dir=$osdir", qw(config -f),
-				$f, '--unset-all', "remotes.$grp"];
-		$self->{lei}->qerr("# @$cmd");
-		if (!$self->{dry_run}) {
-			my $pid = spawn($cmd, undef, { 2 => $self->{lei}->{2} });
+		for ("remotes.$grp") { # TODO: hideRefs
+			my $c = [ @$cmd, '--unset-all', $_ ];
+			$self->{lei}->qerr("# @$c");
+			next if $self->{dry_run};
+			my $pid = spawn($c, undef, $opt);
 			waitpid($pid, 0) // die "waitpid: $!";
-			die "E: @$cmd: \$?=$?" if ($? && ($? >> 8) != 5);
+			die "E: @$c \$?=$?" if ($? && ($? >> 8) != 5);
+		}
 
+		# permanent configs:
+		my $cfg = PublicInbox::Config->git_config_dump($f);
+		for my $fgrp (@$fgrpv) {
+			my $u = $fgrp->{-uri} // die 'BUG: no {-uri}';
+			my $rn = $fgrp->{-remote} // die 'BUG: no {-remote}';
+			for ("url=$u", "fetch=+refs/*:refs/remotes/$rn/*",
+					'tagopt=--no-tags') {
+				my ($k, $v) = split(/=/, $_, 2);
+				$k = "remote.$rn.$k";
+				next if ($cfg->{$k} // '') eq $v;
+				my $c = [@$cmd, $k, $v];
+				$fgrp->{lei}->qerr("# @$c");
+				next if $fgrp->{dry_run};
+				run_die($c, undef, $opt);
+			}
+		}
+
+		if (!$self->{dry_run}) {
 			# update the config atomically via O_APPEND while
 			# respecting git-config locking
 			sysopen(my $lk, "$f.lock", O_CREAT|O_EXCL|O_WRONLY)
@@ -430,7 +449,6 @@ sub fgrp_fetch_all {
 			close $fh or die "close($f): $!";
 			unlink("$f.lock") or die "unlink($f.lock): $!";
 		}
-
 		$cmd = [ @git, "--git-dir=$osdir", @fetch, $grp ];
 		my $end = PublicInbox::OnDestroy->new($$, \&fgrpv_done, $fgrpv);
 		start_cmd($self, $cmd, $opt, $end);
@@ -446,12 +464,15 @@ sub forkgroup_prep {
 	my $dir = "$os/$fg.git";
 	if (!-d $dir && !$self->{dry_run}) {
 		PublicInbox::Import::init_bare($dir);
-		my @cmd = ('git', "--git-dir=$dir", 'config');
-		my $opt = { 2 => $self->{lei}->{2} };
-		for ('repack.useDeltaIslands=true',
-				'pack.island=refs/remotes/([^/]+)/') {
-			run_die([@cmd, split(/=/, $_, 2)], undef, $opt);
-		}
+		my $f = "$dir/config";
+		open my $fh, '+>>', $f or die "open:($f): $!";
+		print $fh <<EOM or die "print($f): $!";
+[repack]
+	useDeltaIslands = true
+[pack]
+	island = refs/remotes/([^/]+)/
+EOM
+		close $fh or die "close($f): $!";
 	}
 	my $key = $self->{-key} // die 'BUG: no -key';
 	my $rn = substr(sha256_hex($key), 0, 16);
@@ -546,17 +567,6 @@ sub resume_fetch {
 sub fgrp_enqueue {
 	my ($fgrp, $end) = @_; # $end calls fgrp_fetch_all
 	return if !keep_going($fgrp);
-	my $opt = { 2 => $fgrp->{lei}->{2} };
-	# --no-tags is required to avoid conflicts
-	my $u = $fgrp->{-uri} // die 'BUG: no {-uri}';
-	my $rn = $fgrp->{-remote} // die 'BUG: no {-remote}';
-	my @cmd = ('git', "--git-dir=$fgrp->{-osdir}", 'config');
-	for ("url=$u", "fetch=+refs/*:refs/remotes/$rn/*", 'tagopt=--no-tags') {
-		my @kv = split(/=/, $_, 2);
-		$kv[0] = "remote.$rn.$kv[0]";
-		$fgrp->{dry_run} ? $fgrp->{lei}->qerr("# @cmd @kv") :
-				run_die([@cmd, @kv], undef, $opt);
-	}
 	++$fgrp->{chg}->{nr_chg};
 	push @{$FGRP_TODO->{$fgrp->{-osdir}}}, $fgrp;
 }

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH 2/2] lei_mirror: fetch most-recently-updated repos, first
  2023-02-12 23:18 [PATCH 0/2] lei_mirror: more tweaks Eric Wong
  2023-02-12 23:18 ` [PATCH 1/2] lei_mirror: further reduce `git config' calls Eric Wong
@ 2023-02-12 23:18 ` Eric Wong
  1 sibling, 0 replies; 3+ messages in thread
From: Eric Wong @ 2023-02-12 23:18 UTC (permalink / raw)
  To: meta

Within the same forkgroup, we can assume the most recently updated
repo has the most data, so fetch those, first.  We'll save new clones
for last since we can preserve {reference} ordering for them.
---
 lib/PublicInbox/LeiMirror.pm | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm
index dd6356bb..4dedac9b 100644
--- a/lib/PublicInbox/LeiMirror.pm
+++ b/lib/PublicInbox/LeiMirror.pm
@@ -23,7 +23,7 @@ use PublicInbox::SHA qw(sha256_hex sha1_hex);
 use POSIX qw(strftime);
 
 our $LIVE; # pid => callback
-our $FGRP_TODO; # objstore -> [ fgrp mirror objects ]
+our $FGRP_TODO; # objstore -> [[ to resume ], [ to clone ]]
 our $TODO; # reference => [ non-fgrp mirror objects ]
 our @PUH; # post-update hooks
 
@@ -404,9 +404,12 @@ sub fgrp_fetch_all {
 		(fetch_args($self->{lei}, $opt), qw(--no-tags --multiple));
 	};
 	push(@fetch, "-j$j") if $j;
-	while (my ($osdir, $fgrpv) = each %$todo) {
+	while (my ($osdir, $fgrp_old_new) = each %$todo) {
 		my $f = "$osdir/config";
 		return if !keep_going($self);
+		my ($fgrpv, $new) = @$fgrp_old_new;
+		@$fgrpv = sort { $b->{-sort} <=> $a->{-sort} } @$fgrpv;
+		push @$fgrpv, @$new; # $new is ordered by references
 
 		my $cmd = ['git', "--git-dir=$osdir", qw(config -f), $f ];
 		# clobber group from previous run atomically
@@ -568,7 +571,8 @@ sub fgrp_enqueue {
 	my ($fgrp, $end) = @_; # $end calls fgrp_fetch_all
 	return if !keep_going($fgrp);
 	++$fgrp->{chg}->{nr_chg};
-	push @{$FGRP_TODO->{$fgrp->{-osdir}}}, $fgrp;
+	my $dst = $FGRP_TODO->{$fgrp->{-osdir}} //= [ [], [] ]; # [ old, new ]
+	push @{$dst->[defined($fgrp->{-sort} ? 0 : 1)]}, $fgrp;
 }
 
 sub clone_v1 {
@@ -586,8 +590,12 @@ sub clone_v1 {
 	my $resume = -d $dst;
 	if (my $fgrp = forkgroup_prep($self, $uri)) {
 		$fgrp->{-fini} = $fini;
-		$resume ? cmp_fp_do($fgrp, \&fgrp_enqueue, $end)
-			: fgrp_enqueue($fgrp, $end);
+		if ($resume) {
+			$fgrp->{-sort} = $fgrp->{-ent}->{modified};
+			cmp_fp_do($fgrp, \&fgrp_enqueue, $end);
+		} else { # new repo, save for last
+			fgrp_enqueue($fgrp, $end);
+		}
 	} elsif ($resume) {
 		cmp_fp_do($self, \&resume_fetch, $uri, $fini);
 	} else { # normal clone

^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2023-02-12 23:19 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-02-12 23:18 [PATCH 0/2] lei_mirror: more tweaks Eric Wong
2023-02-12 23:18 ` [PATCH 1/2] lei_mirror: further reduce `git config' calls Eric Wong
2023-02-12 23:18 ` [PATCH 2/2] lei_mirror: fetch most-recently-updated repos, first Eric Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).