unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
* [PATCH 0/2] v2: epoch reuse space calculation fix
@ 2024-12-17 21:27 Eric Wong
  2024-12-17 21:27 ` [PATCH 1/2] v2writable: simplify epoch directory generation Eric Wong
  2024-12-17 21:27 ` [PATCH 2/2] import: fix space calculation when reusing epochs Eric Wong
  0 siblings, 2 replies; 3+ messages in thread
From: Eric Wong @ 2024-12-17 21:27 UTC (permalink / raw)
  To: meta

Noticed while working on and extracted from a larger refactoring
series which I'm still working on and requires thorough testing.
Publishing 1/2 now just makes 2/2 smoother in this small series.

Eric Wong (2):
  v2writable: simplify epoch directory generation
  import: fix space calculation when reusing epochs

 lib/PublicInbox/InboxWritable.pm |  7 -------
 lib/PublicInbox/LeiStore.pm      |  5 ++---
 lib/PublicInbox/V2Writable.pm    | 33 +++++++++++++++-----------------
 3 files changed, 17 insertions(+), 28 deletions(-)

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH 1/2] v2writable: simplify epoch directory generation
  2024-12-17 21:27 [PATCH 0/2] v2: epoch reuse space calculation fix Eric Wong
@ 2024-12-17 21:27 ` Eric Wong
  2024-12-17 21:27 ` [PATCH 2/2] import: fix space calculation when reusing epochs Eric Wong
  1 sibling, 0 replies; 3+ messages in thread
From: Eric Wong @ 2024-12-17 21:27 UTC (permalink / raw)
  To: meta

As noted in a now-removed comment, InboxWritable->git_dir_latest
seems redundant and an unnecessary function.  Instead, we can
use MultiGit->epoch_dir for these v2-only (non-extindex)
codepaths.
---
 lib/PublicInbox/InboxWritable.pm |  7 -------
 lib/PublicInbox/V2Writable.pm    | 22 ++++++++++------------
 2 files changed, 10 insertions(+), 19 deletions(-)

diff --git a/lib/PublicInbox/InboxWritable.pm b/lib/PublicInbox/InboxWritable.pm
index 8e95cb28..b995a8ad 100644
--- a/lib/PublicInbox/InboxWritable.pm
+++ b/lib/PublicInbox/InboxWritable.pm
@@ -172,13 +172,6 @@ sub cleanup ($) {
 	delete @{$_[0]}{qw(over mm git search)};
 }
 
-# v2+ only, XXX: maybe we can just rely on ->max_git_epoch and remove
-sub git_dir_latest {
-	my ($self, $max) = @_;
-	defined($$max = $self->max_git_epoch) ?
-		"$self->{inboxdir}/git/$$max.git" : undef;
-}
-
 # for unconfigured inboxes
 sub detect_indexlevel ($) {
 	my ($ibx) = @_;
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index 15945b35..194524b7 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -630,22 +630,20 @@ sub importer {
 			return $self->import_init($git, 0);
 		}
 	}
-	my $epoch = 0;
-	my $max;
-	my $latest = $self->{ibx}->git_dir_latest(\$max);
-	if (defined $latest) {
-		my $git = PublicInbox::Git->new($latest);
+	my $epoch = $self->{ibx}->max_git_epoch;
+	if (defined $epoch) { # use existing if not too big
+		my $git = PublicInbox::Git->new(
+			$self->{mg}->epoch_dir."/$epoch.git");
 		my $packed_bytes = $git->packed_bytes;
 		my $unpacked_bytes = $packed_bytes / $PACKING_FACTOR;
 
-		if ($unpacked_bytes >= $self->{rotate_bytes}) {
-			$epoch = $max + 1;
-		} else {
-			$self->{epoch_max} = $max;
+		if ($unpacked_bytes < $self->{rotate_bytes}) { # ok, space left
+			$self->{epoch_max} = $epoch;
 			return $self->import_init($git, $packed_bytes);
 		}
+		++$epoch; # too big, start a new epoch on fall through
 	}
-	$self->{epoch_max} = $epoch;
+	$self->{epoch_max} = $epoch //= 0;
 	my $dir = $self->{mg}->add_epoch($epoch);
 	$self->import_init(PublicInbox::Git->new($dir), 0);
 }
@@ -1211,8 +1209,8 @@ sub index_sync {
 	local $self->{need_checkpoint} = 0;
 	return xapian_only($self, $opt) if $opt->{xapian_only};
 
-	my $epoch_max;
-	my $latest = $self->{ibx}->git_dir_latest(\$epoch_max) // return;
+	my $epoch_max = $self->{ibx}->max_git_epoch // return;
+	my $latest = $self->{mg}->epoch_dir."/$epoch_max.git";
 	if ($opt->{'fast-noop'}) { # nanosecond (st_ctim) comparison
 		use Time::HiRes qw(stat);
 		if (my @mm = stat("$self->{ibx}->{inboxdir}/msgmap.sqlite3")) {

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH 2/2] import: fix space calculation when reusing epochs
  2024-12-17 21:27 [PATCH 0/2] v2: epoch reuse space calculation fix Eric Wong
  2024-12-17 21:27 ` [PATCH 1/2] v2writable: simplify epoch directory generation Eric Wong
@ 2024-12-17 21:27 ` Eric Wong
  1 sibling, 0 replies; 3+ messages in thread
From: Eric Wong @ 2024-12-17 21:27 UTC (permalink / raw)
  To: meta

Dividing the result of $git->packed_bytes by $PACKING_FACTOR
_twice_ was completely wrong for v2.  Just calculate
$unpacked_bytes once and use it for the Import->{bytes_added}
field.  The calculation for lei/store was actually correct,
just redundant since repeated division is unnecessary.
---
 lib/PublicInbox/LeiStore.pm   | 5 ++---
 lib/PublicInbox/V2Writable.pm | 9 ++++-----
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm
index 28eb5710..5b2c5587 100644
--- a/lib/PublicInbox/LeiStore.pm
+++ b/lib/PublicInbox/LeiStore.pm
@@ -94,15 +94,14 @@ sub importer {
 		$self->done; # unlock
 		# re-acquire lock, update alternates for new epoch
 		(undef, $tl) = eidx_init($self);
-		my $packed_bytes = $git->packed_bytes;
-		my $unpacked_bytes = $packed_bytes / $self->packing_factor;
+		my $unpacked_bytes = int($git->packed_bytes / $self->packing_factor);
 		if ($unpacked_bytes >= $self->rotate_bytes) {
 			$max++;
 			next;
 		}
 		my ($n, $e) = git_ident($git);
 		$self->{im} = $im = PublicInbox::Import->new($git, $n, $e);
-		$im->{bytes_added} = int($packed_bytes / $self->packing_factor);
+		$im->{bytes_added} = $unpacked_bytes;
 		$im->{lock_path} = undef;
 		$im->{path_type} = 'v2';
 		return $im;
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index 194524b7..61c41b60 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -634,12 +634,11 @@ sub importer {
 	if (defined $epoch) { # use existing if not too big
 		my $git = PublicInbox::Git->new(
 			$self->{mg}->epoch_dir."/$epoch.git");
-		my $packed_bytes = $git->packed_bytes;
-		my $unpacked_bytes = $packed_bytes / $PACKING_FACTOR;
+		my $unpacked_bytes = int($git->packed_bytes / $PACKING_FACTOR);
 
 		if ($unpacked_bytes < $self->{rotate_bytes}) { # ok, space left
 			$self->{epoch_max} = $epoch;
-			return $self->import_init($git, $packed_bytes);
+			return $self->import_init($git, $unpacked_bytes);
 		}
 		++$epoch; # too big, start a new epoch on fall through
 	}
@@ -649,9 +648,9 @@ sub importer {
 }
 
 sub import_init {
-	my ($self, $git, $packed_bytes, $tmp) = @_;
+	my ($self, $git, $unpacked_bytes, $tmp) = @_;
 	my $im = PublicInbox::Import->new($git, undef, undef, $self->{ibx});
-	$im->{bytes_added} = int($packed_bytes / $PACKING_FACTOR);
+	$im->{bytes_added} = $unpacked_bytes;
 	$im->{lock_path} = undef;
 	$im->{path_type} = 'v2';
 	$self->{im} = $im unless $tmp;

^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2024-12-17 21:27 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-12-17 21:27 [PATCH 0/2] v2: epoch reuse space calculation fix Eric Wong
2024-12-17 21:27 ` [PATCH 1/2] v2writable: simplify epoch directory generation Eric Wong
2024-12-17 21:27 ` [PATCH 2/2] import: fix space calculation when reusing epochs Eric Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).