unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 2/2] lei_mirror: fetch most-recently-updated repos, first
Date: Sun, 12 Feb 2023 23:18:28 +0000	[thread overview]
Message-ID: <20230212231828.33336-3-e@80x24.org> (raw)
In-Reply-To: <20230212231828.33336-1-e@80x24.org>

Within the same forkgroup, we can assume the most recently updated
repo has the most data, so fetch those, first.  We'll save new clones
for last since we can preserve {reference} ordering for them.
---
 lib/PublicInbox/LeiMirror.pm | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm
index dd6356bb..4dedac9b 100644
--- a/lib/PublicInbox/LeiMirror.pm
+++ b/lib/PublicInbox/LeiMirror.pm
@@ -23,7 +23,7 @@ use PublicInbox::SHA qw(sha256_hex sha1_hex);
 use POSIX qw(strftime);
 
 our $LIVE; # pid => callback
-our $FGRP_TODO; # objstore -> [ fgrp mirror objects ]
+our $FGRP_TODO; # objstore -> [[ to resume ], [ to clone ]]
 our $TODO; # reference => [ non-fgrp mirror objects ]
 our @PUH; # post-update hooks
 
@@ -404,9 +404,12 @@ sub fgrp_fetch_all {
 		(fetch_args($self->{lei}, $opt), qw(--no-tags --multiple));
 	};
 	push(@fetch, "-j$j") if $j;
-	while (my ($osdir, $fgrpv) = each %$todo) {
+	while (my ($osdir, $fgrp_old_new) = each %$todo) {
 		my $f = "$osdir/config";
 		return if !keep_going($self);
+		my ($fgrpv, $new) = @$fgrp_old_new;
+		@$fgrpv = sort { $b->{-sort} <=> $a->{-sort} } @$fgrpv;
+		push @$fgrpv, @$new; # $new is ordered by references
 
 		my $cmd = ['git', "--git-dir=$osdir", qw(config -f), $f ];
 		# clobber group from previous run atomically
@@ -568,7 +571,8 @@ sub fgrp_enqueue {
 	my ($fgrp, $end) = @_; # $end calls fgrp_fetch_all
 	return if !keep_going($fgrp);
 	++$fgrp->{chg}->{nr_chg};
-	push @{$FGRP_TODO->{$fgrp->{-osdir}}}, $fgrp;
+	my $dst = $FGRP_TODO->{$fgrp->{-osdir}} //= [ [], [] ]; # [ old, new ]
+	push @{$dst->[defined($fgrp->{-sort} ? 0 : 1)]}, $fgrp;
 }
 
 sub clone_v1 {
@@ -586,8 +590,12 @@ sub clone_v1 {
 	my $resume = -d $dst;
 	if (my $fgrp = forkgroup_prep($self, $uri)) {
 		$fgrp->{-fini} = $fini;
-		$resume ? cmp_fp_do($fgrp, \&fgrp_enqueue, $end)
-			: fgrp_enqueue($fgrp, $end);
+		if ($resume) {
+			$fgrp->{-sort} = $fgrp->{-ent}->{modified};
+			cmp_fp_do($fgrp, \&fgrp_enqueue, $end);
+		} else { # new repo, save for last
+			fgrp_enqueue($fgrp, $end);
+		}
 	} elsif ($resume) {
 		cmp_fp_do($self, \&resume_fetch, $uri, $fini);
 	} else { # normal clone

      parent reply	other threads:[~2023-02-12 23:19 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-02-12 23:18 [PATCH 0/2] lei_mirror: more tweaks Eric Wong
2023-02-12 23:18 ` [PATCH 1/2] lei_mirror: further reduce `git config' calls Eric Wong
2023-02-12 23:18 ` Eric Wong [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230212231828.33336-3-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).