unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
* [PATCH 0/6] cindex fixes, and some spring cleaning
@ 2023-04-07 12:40 Eric Wong
  2023-04-07 12:40 ` [PATCH 1/6] cindex: improve progress display Eric Wong
                   ` (5 more replies)
  0 siblings, 6 replies; 7+ messages in thread
From: Eric Wong @ 2023-04-07 12:40 UTC (permalink / raw)
  To: meta

cindex now preserves indexlevel like older mail-based indices

We'll reuse umask code across inboxes, extindex, and now cindex
for code.  This also lets us simplify some of our old code by
eliminating some useless functions.

The vstring stuff should make the code more readable to new
contributors.

Eric Wong (6):
  cindex: improve progress display
  cindex: preserve indexlevel across invocations
  umask: hoist out of InboxWritable
  umask: rely on the OnDestroy-based call where applicable
  searchidx: use vstring to improve readability
  switch git version comparisons to vstrings, too

 MANIFEST                         |  1 +
 lib/PublicInbox/CodeSearchIdx.pm | 49 ++++++++++++----------
 lib/PublicInbox/ExtSearchIdx.pm  | 17 +++-----
 lib/PublicInbox/Git.pm           | 11 +++--
 lib/PublicInbox/InboxWritable.pm | 70 +-------------------------------
 lib/PublicInbox/LeiMirror.pm     |  2 +-
 lib/PublicInbox/SearchIdx.pm     | 37 ++++++++---------
 lib/PublicInbox/TestCommon.pm    | 16 +++-----
 lib/PublicInbox/Umask.pm         | 70 ++++++++++++++++++++++++++++++++
 lib/PublicInbox/V2Writable.pm    | 23 ++++-------
 lib/PublicInbox/Xapcmd.pm        | 35 ++++++++--------
 script/public-inbox-convert      |  9 ++--
 t/cindex.t                       | 45 ++++++++++++++++++++
 t/search.t                       |  7 ++--
 14 files changed, 212 insertions(+), 180 deletions(-)
 create mode 100644 lib/PublicInbox/Umask.pm


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/6] cindex: improve progress display
  2023-04-07 12:40 [PATCH 0/6] cindex fixes, and some spring cleaning Eric Wong
@ 2023-04-07 12:40 ` Eric Wong
  2023-04-07 12:40 ` [PATCH 2/6] cindex: preserve indexlevel across invocations Eric Wong
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Eric Wong @ 2023-04-07 12:40 UTC (permalink / raw)
  To: meta

Instead of displaying the total number of changes across all
repos next to the repo path ("$GIT_DIR: $TOTAL commits"), we'll
only show the number of changes made in that repo.

We'll also note when a prune is complete on a shard, since
prunes may often be expensive no-ops.
---
 lib/PublicInbox/CodeSearchIdx.pm | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/lib/PublicInbox/CodeSearchIdx.pm b/lib/PublicInbox/CodeSearchIdx.pm
index f3d07f25..5f20325a 100644
--- a/lib/PublicInbox/CodeSearchIdx.pm
+++ b/lib/PublicInbox/CodeSearchIdx.pm
@@ -437,6 +437,7 @@ sub partition_refs ($$$) {
 		$fh;
 	} @RDONLY_XDB;
 
+	my $n0 = $NCHANGE;
 	while (defined(my $cmt = <$rfh>)) {
 		chomp $cmt;
 		my $n = hex(substr($cmt, 0, 8)) % scalar(@RDONLY_XDB);
@@ -458,7 +459,8 @@ sub partition_refs ($$$) {
 	close($rfh);
 	return () if $DO_QUIT;
 	if (!$? || (($? & 127) == POSIX::SIGPIPE && $seen > $SEEN_MAX)) {
-		progress($self, "$git->{git_dir}: $NCHANGE commits");
+		my $n = $NCHANGE - $n0;
+		progress($self, "$git->{git_dir}: $n commits") if $n;
 		for my $fh (@shard_in) {
 			$fh->flush or die "flush: $!";
 			sysseek($fh, 0, SEEK_SET) or die "seek: $!";
@@ -697,6 +699,8 @@ sub event_step { # may be requeued via DS
 	cidx_ckpoint($self);
 	return PublicInbox::DS::requeue($self) if $PRUNE_CUR <= $PRUNE_MAX;
 	send($PRUNE_OP_P, "prune_done $self->{shard}", MSG_EOR);
+	$PRUNE_NR //= 0;
+	progress($self, "prune [$self->{shard}] $PRUNE_NR done");
 	$TMP_GIT->cleanup;
 	$TMP_GIT = $PRUNE_OP_P = $PRUNE_CUR = $PRUNE_MAX = undef;
 	%ACTIVE_GIT_DIR = ();

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/6] cindex: preserve indexlevel across invocations
  2023-04-07 12:40 [PATCH 0/6] cindex fixes, and some spring cleaning Eric Wong
  2023-04-07 12:40 ` [PATCH 1/6] cindex: improve progress display Eric Wong
@ 2023-04-07 12:40 ` Eric Wong
  2023-04-07 12:40 ` [PATCH 3/6] umask: hoist out of InboxWritable Eric Wong
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Eric Wong @ 2023-04-07 12:40 UTC (permalink / raw)
  To: meta

This matches the behavior of mail indexers and ensures `medium'
indices don't grow unexpectedly to be come `full' indices.
---
 lib/PublicInbox/CodeSearchIdx.pm | 15 +++++++++--
 lib/PublicInbox/SearchIdx.pm     |  2 +-
 t/cindex.t                       | 45 ++++++++++++++++++++++++++++++++
 3 files changed, 59 insertions(+), 3 deletions(-)

diff --git a/lib/PublicInbox/CodeSearchIdx.pm b/lib/PublicInbox/CodeSearchIdx.pm
index 5f20325a..3a3fc03e 100644
--- a/lib/PublicInbox/CodeSearchIdx.pm
+++ b/lib/PublicInbox/CodeSearchIdx.pm
@@ -85,7 +85,6 @@ sub new {
 		xpfx => "$dir/cidx".  PublicInbox::CodeSearch::CIDX_SCHEMA_VER,
 		cidx_dir => $dir,
 		creat => 1, # TODO: get rid of this, should be implicit
-		indexlevel => $l,
 		transact_bytes => 0, # for checkpoint
 		total_bytes => 0, # for lock_release
 		current_info => '',
@@ -617,16 +616,28 @@ sub cidx_init ($) {
 	}
 	$self->lock_acquire;
 	my @shards;
+	my $l = $self->{indexlevel} //= $self->{-opt}->{indexlevel};
+
 	for my $n (0..($self->{nshard} - 1)) {
 		my $shard = bless { %$self, shard => $n }, ref($self);
 		delete @$shard{qw(lockfh lock_path)};
-		$shard->idx_acquire;
+		my $xdb = $shard->idx_acquire;
+		if (!$n) {
+			if (($l // '') eq 'medium') {
+				$xdb->set_metadata('indexlevel', $l);
+			} elsif (($l // '') eq 'full') {
+				$xdb->set_metadata('indexlevel', ''); # unset
+			}
+			$l ||= $xdb->get_metadata('indexlevel') || 'full';
+		}
+		$shard->{indexlevel} = $l;
 		$shard->idx_release;
 		$shard->wq_workers_start("cidx shard[$n]", 1, $SIGSET, {
 			siblings => \@shards, # for ipc_atfork_child
 		}, \&shard_done_wait, $self);
 		push @shards, $shard;
 	}
+	$self->{indexlevel} //= $l;
 	# this warning needs to happen after idx_acquire
 	state $once;
 	warn <<EOM if $PublicInbox::Search::X{CLOEXEC_UNSET} && !$once++;
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index f36c8f97..699af432 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -90,7 +90,7 @@ sub new {
 	$self;
 }
 
-sub need_xapian ($) { $_[0]->{indexlevel} =~ $xapianlevels }
+sub need_xapian ($) { ($_[0]->{indexlevel} // 'full') =~ $xapianlevels }
 
 sub idx_release {
 	my ($self, $wake) = @_;
diff --git a/t/cindex.t b/t/cindex.t
index 9da0ba69..d40f73ff 100644
--- a/t/cindex.t
+++ b/t/cindex.t
@@ -4,11 +4,13 @@
 use v5.12;
 use PublicInbox::TestCommon;
 use Cwd qw(getcwd abs_path);
+use List::Util qw(sum);
 require_mods(qw(json Search::Xapian));
 use_ok 'PublicInbox::CodeSearchIdx';
 require PublicInbox::Import;
 my ($tmp, $for_destroy) = tmpdir();
 my $pwd = getcwd();
+my @unused_keys = qw(last_commit has_threadid skip_docdata);
 
 # I reworked CodeSearchIdx->shard_worker to handle empty trees
 # in the initial commit generated by cvs2svn for xapian.git
@@ -71,7 +73,48 @@ ok(run_script([qw(-cindex --dangerous -q -d), "$tmp/ext", $zp, "$tmp/wt0"]),
 ok(-e "$tmp/ext/cidx.lock", 'external dir created');
 ok(!-d "$zp/.git/public-inbox-cindex", 'no cindex in original coderepo');
 
+ok(run_script([qw(-cindex -L medium --dangerous -q -d),
+	"$tmp/med", $zp, "$tmp/wt0"]), 'cindex external medium');
+
+my $no_metadata_set = sub {
+	my ($i, $extra, $xdb) = @_;
+	for my $xdb (@$xdb) {
+		for my $k (@unused_keys, @$extra) {
+			is($xdb->get_metadata($k) // '', '',
+				"metadata $k unset in shard #$i");
+		}
+		++$i;
+	}
+};
+
+{
+	my $mid_size = sum(map { -s $_ } glob("$tmp/med/cidx*/*/*"));
+	my $full_size = sum(map { -s $_ } glob("$tmp/ext/cidx*/*/*"));
+	ok($full_size > $mid_size, 'full size > mid size') or
+		diag "full=$full_size mid=$mid_size";
+	for my $l (qw(med ext)) {
+		ok(run_script([qw(-cindex -q --reindex -u -d), "$tmp/$l"]),
+			"reindex $l");
+	}
+	$mid_size = sum(map { -s $_ } glob("$tmp/med/cidx*/*/*"));
+	$full_size = sum(map { -s $_ } glob("$tmp/ext/cidx*/*/*"));
+	ok($full_size > $mid_size, 'full size > mid size after reindex') or
+		diag "full=$full_size mid=$mid_size";
+	my $csrch = PublicInbox::CodeSearch->new("$tmp/med");
+	my ($xdb0, @xdb) = $csrch->xdb_shards_flat;
+	$no_metadata_set->(0, [], [ $xdb0 ]);
+	is($xdb0->get_metadata('indexlevel'), 'medium',
+		'indexlevel set in shard #0');
+	$no_metadata_set->(1, ['indexlevel'], \@xdb);
+
+	ok(run_script([qw(-cindex -q -L full --reindex -u -d), "$tmp/med"]),
+		'reindex medium as full');
+	@xdb = $csrch->xdb_shards_flat;
+	$no_metadata_set->(0, ['indexlevel'], \@xdb);
+}
+
 use_ok 'PublicInbox::CodeSearch';
+
 if ('multi-repo search') {
 	my $csrch = PublicInbox::CodeSearch->new("$tmp/ext");
 	my $mset = $csrch->mset('NUL');
@@ -86,6 +129,8 @@ if ('multi-repo search') {
 	$mset = $csrch->mset('NUL', { git_dir => abs_path("$zp/.git") });
 	@have = sort(map { $_->get_document->get_data } $mset->items);
 	is_xdeeply(\@have, $exp, 'got expected subjects w/ GIT_DIR filter');
+	my @xdb = $csrch->xdb_shards_flat;
+	$no_metadata_set->(0, ['indexlevel'], \@xdb);
 }
 
 if ('--update') {

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 3/6] umask: hoist out of InboxWritable
  2023-04-07 12:40 [PATCH 0/6] cindex fixes, and some spring cleaning Eric Wong
  2023-04-07 12:40 ` [PATCH 1/6] cindex: improve progress display Eric Wong
  2023-04-07 12:40 ` [PATCH 2/6] cindex: preserve indexlevel across invocations Eric Wong
@ 2023-04-07 12:40 ` Eric Wong
  2023-04-07 12:40 ` [PATCH 4/6] umask: rely on the OnDestroy-based call where applicable Eric Wong
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Eric Wong @ 2023-04-07 12:40 UTC (permalink / raw)
  To: meta

Since CodeSearchIdx doesn't deal with inboxes, it makes sense
to split it out from inbox-specific code and start moving
towards using OnDestroy to restore the umask at the end of
scope and reducing extra functions.
---
 MANIFEST                         |  1 +
 lib/PublicInbox/CodeSearchIdx.pm | 28 ++++---------
 lib/PublicInbox/ExtSearchIdx.pm  |  3 +-
 lib/PublicInbox/InboxWritable.pm | 70 +-------------------------------
 lib/PublicInbox/SearchIdx.pm     |  6 ++-
 lib/PublicInbox/Umask.pm         | 70 ++++++++++++++++++++++++++++++++
 t/search.t                       |  7 ++--
 7 files changed, 91 insertions(+), 94 deletions(-)
 create mode 100644 lib/PublicInbox/Umask.pm

diff --git a/MANIFEST b/MANIFEST
index a0e64c6a..c338f0f4 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -340,6 +340,7 @@ lib/PublicInbox/TestCommon.pm
 lib/PublicInbox/Tmpfile.pm
 lib/PublicInbox/URIimap.pm
 lib/PublicInbox/URInntps.pm
+lib/PublicInbox/Umask.pm
 lib/PublicInbox/Unsubscribe.pm
 lib/PublicInbox/UserContent.pm
 lib/PublicInbox/V2Writable.pm
diff --git a/lib/PublicInbox/CodeSearchIdx.pm b/lib/PublicInbox/CodeSearchIdx.pm
index 3a3fc03e..78032c00 100644
--- a/lib/PublicInbox/CodeSearchIdx.pm
+++ b/lib/PublicInbox/CodeSearchIdx.pm
@@ -770,28 +770,23 @@ sub init_tmp_git_dir ($) {
 
 sub prep_umask ($) {
 	my ($self) = @_;
-	my $um;
-	my $cur = umask;
 	if ($self->{-cidx_internal}) { # respect core.sharedRepository
 		@{$self->{git_dirs}} == 1 or die 'BUG: only for GIT_DIR';
-		# yuck, FIXME move umask handling out of inbox-specific stuff
-		require PublicInbox::InboxWritable;
-		my $git = PublicInbox::Git->new($self->{git_dirs}->[0]);
-		chomp($um = $git->qx('config', 'core.sharedRepository') // '');
-		$um = PublicInbox::InboxWritable::_git_config_perm(undef, $um);
-		$um = PublicInbox::InboxWritable::_umask_for($um);
-		umask == $um or progress($self, 'umask from git: ',
-						sprintf('0%03o', $um));
+		local $self->{git} =
+			PublicInbox::Git->new($self->{git_dirs}->[0]);
+		$self->with_umask;
 	} elsif (-d $self->{cidx_dir}) { # respect existing perms
 		my @st = stat(_);
-		$um = (~$st[2] & 0777);
+		my $um = (~$st[2] & 0777);
+		$self->{umask} = $um; # for SearchIdx->with_umask
 		umask == $um or progress($self, 'using umask from ',
 						$self->{cidx_dir}, ': ',
 						sprintf('0%03o', $um));
-	}
-	defined($um) ?
-		PublicInbox::OnDestroy->new(\&CORE::umask, umask($um)) :
+		PublicInbox::OnDestroy->new($$, \&CORE::umask, umask($um));
+	} else {
+		$self->{umask} = umask; # for SearchIdx->with_umask
 		undef;
+	}
 }
 
 sub start_prune ($) {
@@ -896,9 +891,4 @@ sub shard_done_wait { # awaitpid cb via ipc_worker_reap
 	PublicInbox::DS::enqueue_reap() if !shards_active(); # once more for PLC
 }
 
-sub with_umask { # TODO get rid of this treewide and rely on OnDestroy
-	my ($self, $cb, @arg) = @_;
-	$cb->(@arg);
-}
-
 1;
diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm
index 5445b156..6f3711ba 100644
--- a/lib/PublicInbox/ExtSearchIdx.pm
+++ b/lib/PublicInbox/ExtSearchIdx.pm
@@ -16,7 +16,7 @@
 package PublicInbox::ExtSearchIdx;
 use strict;
 use v5.10.1;
-use parent qw(PublicInbox::ExtSearch PublicInbox::Lock);
+use parent qw(PublicInbox::ExtSearch PublicInbox::Umask PublicInbox::Lock);
 use Carp qw(croak carp);
 use Scalar::Util qw(blessed);
 use Sys::Hostname qw(hostname);
@@ -1397,7 +1397,6 @@ sub eidx_watch { # public-inbox-extindex --watch main loop
 
 no warnings 'once';
 *done = \&PublicInbox::V2Writable::done;
-*with_umask = \&PublicInbox::InboxWritable::with_umask;
 *parallel_init = \&PublicInbox::V2Writable::parallel_init;
 *nproc_shards = \&PublicInbox::V2Writable::nproc_shards;
 *sync_prepare = \&PublicInbox::V2Writable::sync_prepare;
diff --git a/lib/PublicInbox/InboxWritable.pm b/lib/PublicInbox/InboxWritable.pm
index 022e2a69..65952aa2 100644
--- a/lib/PublicInbox/InboxWritable.pm
+++ b/lib/PublicInbox/InboxWritable.pm
@@ -1,25 +1,17 @@
-# Copyright (C) 2018-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # Extends read-only Inbox for writing
 package PublicInbox::InboxWritable;
 use strict;
 use v5.10.1;
-use parent qw(PublicInbox::Inbox Exporter);
+use parent qw(PublicInbox::Inbox PublicInbox::Umask Exporter);
 use PublicInbox::Import;
 use PublicInbox::Filter::Base qw(REJECT);
 use Errno qw(ENOENT);
 our @EXPORT_OK = qw(eml_from_path);
 use Fcntl qw(O_RDONLY O_NONBLOCK);
 
-use constant {
-	PERM_UMASK => 0,
-	OLD_PERM_GROUP => 1,
-	OLD_PERM_EVERYBODY => 2,
-	PERM_GROUP => 0660,
-	PERM_EVERYBODY => 0664,
-};
-
 sub new {
 	my ($class, $ibx, $creat_opt) = @_;
 	return $ibx if ref($ibx) eq $class;
@@ -176,64 +168,6 @@ sub import_mbox {
 	$im->done;
 }
 
-sub _read_git_config_perm {
-	my ($self) = @_;
-	chomp(my $perm = $self->git->qx('config', 'core.sharedRepository'));
-	$perm;
-}
-
-sub _git_config_perm {
-	my $self = shift;
-	my $perm = scalar @_ ? $_[0] : _read_git_config_perm($self);
-	return PERM_UMASK if (!defined($perm) || $perm eq '');
-	return PERM_UMASK if ($perm eq 'umask');
-	return PERM_GROUP if ($perm eq 'group');
-	if ($perm =~ /\A(?:all|world|everybody)\z/) {
-		return PERM_EVERYBODY;
-	}
-	return PERM_GROUP if ($perm =~ /\A(?:true|yes|on|1)\z/);
-	return PERM_UMASK if ($perm =~ /\A(?:false|no|off|0)\z/);
-
-	my $i = oct($perm);
-	return PERM_UMASK if ($i == PERM_UMASK);
-	return PERM_GROUP if ($i == OLD_PERM_GROUP);
-	return PERM_EVERYBODY if ($i == OLD_PERM_EVERYBODY);
-
-	if (($i & 0600) != 0600) {
-		die "core.sharedRepository mode invalid: ".
-		    sprintf('%.3o', $i) . "\nOwner must have permissions\n";
-	}
-	($i & 0666);
-}
-
-sub _umask_for {
-	my ($perm) = @_; # _git_config_perm return value
-	my $rv = $perm;
-	return umask if $rv == 0;
-
-	# set +x bit if +r or +w were set
-	$rv |= 0100 if ($rv & 0600);
-	$rv |= 0010 if ($rv & 0060);
-	$rv |= 0001 if ($rv & 0006);
-	(~$rv & 0777);
-}
-
-sub with_umask {
-	my ($self, $cb, @arg) = @_;
-	my $old = umask($self->{umask} //= umask_prepare($self));
-	my $rv = eval { $cb->(@arg) };
-	my $err = $@;
-	umask $old;
-	die $err if $err;
-	$rv;
-}
-
-sub umask_prepare {
-	my ($self) = @_;
-	my $perm = _git_config_perm($self);
-	_umask_for($perm);
-}
-
 sub cleanup ($) {
 	delete @{$_[0]}{qw(over mm git search)};
 }
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 699af432..69ab30e6 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -9,7 +9,8 @@
 package PublicInbox::SearchIdx;
 use strict;
 use v5.10.1;
-use parent qw(PublicInbox::Search PublicInbox::Lock Exporter);
+use parent qw(PublicInbox::Search PublicInbox::Lock PublicInbox::Umask
+	Exporter);
 use PublicInbox::Eml;
 use PublicInbox::Search qw(xap_terms);
 use PublicInbox::InboxWritable;
@@ -821,7 +822,8 @@ sub unindex_both { # git->cat_async callback
 
 sub with_umask {
 	my $self = shift;
-	($self->{ibx} // $self->{eidx})->with_umask(@_);
+	my $owner = $self->{ibx} // $self->{eidx};
+	$owner ? $owner->with_umask(@_) : $self->SUPER::with_umask(@_)
 }
 
 # called by public-inbox-index
diff --git a/lib/PublicInbox/Umask.pm b/lib/PublicInbox/Umask.pm
new file mode 100644
index 00000000..00772ce5
--- /dev/null
+++ b/lib/PublicInbox/Umask.pm
@@ -0,0 +1,70 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# base class to ensures Xapian||SQLite files respect core.sharedRepository
+# of git repos
+package PublicInbox::Umask;
+use v5.12;
+use PublicInbox::OnDestroy;
+
+use constant {
+	PERM_UMASK => 0,
+	OLD_PERM_GROUP => 1,
+	OLD_PERM_EVERYBODY => 2,
+	PERM_GROUP => 0660,
+	PERM_EVERYBODY => 0664,
+};
+
+sub _read_git_config_perm {
+	my ($self) = @_;
+	chomp(my $perm = $self->git->qx('config', 'core.sharedRepository'));
+	$perm;
+}
+
+sub _git_config_perm {
+	my $self = shift;
+	my $perm = scalar @_ ? $_[0] : _read_git_config_perm($self);
+	$perm //= '';
+	return PERM_UMASK if $perm eq '' || $perm eq 'umask';
+	return PERM_GROUP if $perm eq 'group';
+	return PERM_EVERYBODY if $perm =~ /\A(?:all|world|everybody)\z/;
+	return PERM_GROUP if ($perm =~ /\A(?:true|yes|on|1)\z/);
+	return PERM_UMASK if ($perm =~ /\A(?:false|no|off|0)\z/);
+
+	my $i = oct($perm);
+	return PERM_UMASK if $i == PERM_UMASK;
+	return PERM_GROUP if $i == OLD_PERM_GROUP;
+	return PERM_EVERYBODY if $i == OLD_PERM_EVERYBODY;
+
+	if (($i & 0600) != 0600) {
+		die "core.sharedRepository mode invalid: ".
+		    sprintf('%.3o', $i) . "\nOwner must have permissions\n";
+	}
+	($i & 0666);
+}
+
+sub _umask_for {
+	my ($perm) = @_; # _git_config_perm return value
+	my $rv = $perm;
+	return umask if $rv == 0;
+
+	# set +x bit if +r or +w were set
+	$rv |= 0100 if ($rv & 0600);
+	$rv |= 0010 if ($rv & 0060);
+	$rv |= 0001 if ($rv & 0006);
+	(~$rv & 0777);
+}
+
+sub with_umask {
+	my ($self, $cb, @arg) = @_;
+	my $old = umask($self->{umask} //= umask_prepare($self));
+	my $restore = PublicInbox::OnDestroy->new($$, \&CORE::umask, $old);
+	$cb ? $cb->(@arg) : $restore;
+}
+
+sub umask_prepare {
+	my ($self) = @_;
+	_umask_for(_git_config_perm($self));
+}
+
+1;
diff --git a/t/search.t b/t/search.t
index cf639a6d..ea5a2a5a 100644
--- a/t/search.t
+++ b/t/search.t
@@ -41,8 +41,9 @@ sub oct_is ($$$) {
 
 {
 	# git repository perms
+	use_ok 'PublicInbox::Umask';
 	oct_is($ibx->_git_config_perm(),
-		&PublicInbox::InboxWritable::PERM_GROUP,
+		PublicInbox::Umask::PERM_GROUP(),
 		'undefined permission is group');
 	my @t = (
 		[ '0644', 0022, '644 => umask(0022)' ],
@@ -54,8 +55,8 @@ sub oct_is ($$$) {
 	);
 	for (@t) {
 		my ($perm, $exp, $msg) = @$_;
-		my $got = PublicInbox::InboxWritable::_umask_for(
-			PublicInbox::InboxWritable->_git_config_perm($perm));
+		my $got = PublicInbox::Umask::_umask_for(
+			PublicInbox::Umask->_git_config_perm($perm));
 		oct_is($got, $exp, $msg);
 	}
 }

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 4/6] umask: rely on the OnDestroy-based call where applicable
  2023-04-07 12:40 [PATCH 0/6] cindex fixes, and some spring cleaning Eric Wong
                   ` (2 preceding siblings ...)
  2023-04-07 12:40 ` [PATCH 3/6] umask: hoist out of InboxWritable Eric Wong
@ 2023-04-07 12:40 ` Eric Wong
  2023-04-07 12:40 ` [PATCH 5/6] searchidx: use vstring to improve readability Eric Wong
  2023-04-07 12:40 ` [PATCH 6/6] switch git version comparisons to vstrings, too Eric Wong
  5 siblings, 0 replies; 7+ messages in thread
From: Eric Wong @ 2023-04-07 12:40 UTC (permalink / raw)
  To: meta

This lets us get rid of some awkwardness around the old API
and single-use subroutines while saving us some LoC.
---
 lib/PublicInbox/ExtSearchIdx.pm | 14 +++++--------
 lib/PublicInbox/SearchIdx.pm    | 19 ++++++------------
 lib/PublicInbox/V2Writable.pm   | 23 +++++++++-------------
 lib/PublicInbox/Xapcmd.pm       | 35 +++++++++++++++------------------
 script/public-inbox-convert     |  9 ++++-----
 5 files changed, 40 insertions(+), 60 deletions(-)

diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm
index 6f3711ba..6856ae66 100644
--- a/lib/PublicInbox/ExtSearchIdx.pm
+++ b/lib/PublicInbox/ExtSearchIdx.pm
@@ -1179,12 +1179,6 @@ sub update_last_commit { # overrides V2Writable
 	$self->{oidx}->eidx_meta($meta_key, $latest_cmt);
 }
 
-sub _idx_init { # with_umask callback
-	my ($self, $opt) = @_;
-	PublicInbox::V2Writable::_idx_init($self, $opt); # acquires ei.lock
-	$self->{midx} = PublicInbox::MiscIdx->new($self);
-}
-
 sub symlink_packs ($$) {
 	my ($ibx, $pd) = @_;
 	my $ret = 0;
@@ -1270,15 +1264,17 @@ sub idx_init { # similar to V2Writable
 	}
 	($has_new || $prune_nr || $new ne '') and
 		$self->{mg}->write_alternates($mode, $alt, $new);
-	$git_midx and $self->with_umask(sub {
+	my $restore = $self->with_umask;
+	if ($git_midx) {
 		my @cmd = ('multi-pack-index');
 		push @cmd, '--no-progress' if ($opt->{quiet}//0) > 1;
 		my $lk = $self->lock_for_scope;
 		system('git', "--git-dir=$ALL", @cmd, 'write');
 		# ignore errors, fairly new command, may not exist
-	});
+	}
 	$self->parallel_init($self->{indexlevel});
-	$self->with_umask(\&_idx_init, $self, $opt);
+	PublicInbox::V2Writable::_idx_init($self, $opt); # acquires ei.lock
+	$self->{midx} = PublicInbox::MiscIdx->new($self);
 	$self->{oidx}->begin_lazy;
 	$self->{oidx}->eidx_prep;
 	$self->{midx}->create_xdb if $new ne '';
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 69ab30e6..511dd4d6 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -1110,8 +1110,10 @@ sub DESTROY {
 	$_[0]->{lockfh} = undef;
 }
 
-sub _begin_txn {
+sub begin_txn_lazy {
 	my ($self) = @_;
+	return if $self->{txn};
+	my $restore = $self->with_umask;
 	my $xdb = $self->{xdb} || idx_acquire($self);
 	$self->{oidx}->begin_lazy if $self->{oidx};
 	$xdb->begin_transaction if $xdb;
@@ -1119,11 +1121,6 @@ sub _begin_txn {
 	$xdb;
 }
 
-sub begin_txn_lazy {
-	my ($self) = @_;
-	$self->with_umask(\&_begin_txn, $self) if !$self->{txn};
-}
-
 # store 'indexlevel=medium' in v2 shard=0 and v1 (only one shard)
 # This metadata is read by InboxWritable->detect_indexlevel:
 sub set_metadata_once {
@@ -1147,8 +1144,10 @@ sub set_metadata_once {
 	}
 }
 
-sub _commit_txn {
+sub commit_txn_lazy {
 	my ($self) = @_;
+	return unless delete($self->{txn});
+	my $restore = $self->with_umask;
 	if (my $eidx = $self->{eidx}) {
 		$eidx->git->async_wait_all;
 		$eidx->{transact_bytes} = 0;
@@ -1160,12 +1159,6 @@ sub _commit_txn {
 	$self->{oidx}->commit_lazy if $self->{oidx};
 }
 
-sub commit_txn_lazy {
-	my ($self) = @_;
-	delete($self->{txn}) and
-		$self->with_umask(\&_commit_txn, $self);
-}
-
 sub eidx_shard_new {
 	my ($class, $eidx, $shard) = @_;
 	my $self = bless {
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index d3d13941..856442a9 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -89,13 +89,6 @@ sub init_inbox {
 	$self->done;
 }
 
-# returns undef on duplicate or spam
-# mimics Import::add and wraps it for v2
-sub add {
-	my ($self, $eml, $check_cb) = @_;
-	$self->{ibx}->with_umask(\&_add, $self, $eml, $check_cb);
-}
-
 sub idx_shard ($$) {
 	my ($self, $num) = @_;
 	$self->{idx_shards}->[$num % scalar(@{$self->{idx_shards}})];
@@ -113,8 +106,11 @@ sub do_idx ($$$) {
 	$n >= $self->{batch_bytes};
 }
 
-sub _add {
+# returns undef on duplicate or spam
+# mimics Import::add and wraps it for v2
+sub add {
 	my ($self, $mime, $check_cb) = @_;
+	my $restore = $self->{ibx}->with_umask;
 
 	# spam check:
 	if ($check_cb) {
@@ -391,17 +387,16 @@ sub rewrite_internal ($$;$$$) {
 # (retval[2]) is not part of the stable API shared with Import->remove
 sub remove {
 	my ($self, $eml, $cmt_msg) = @_;
-	my $r = $self->{ibx}->with_umask(\&rewrite_internal,
-						$self, $eml, $cmt_msg);
+	my $restore = $self->{ibx}->with_umask;
+	my $r = rewrite_internal($self, $eml, $cmt_msg);
 	defined($r) && defined($r->[0]) ? @$r: undef;
 }
 
 sub _replace ($$;$$) {
 	my ($self, $old_eml, $new_eml, $sref) = @_;
-	my $arg = [ $self, $old_eml, undef, $new_eml, $sref ];
-	my $rewritten = $self->{ibx}->with_umask(\&rewrite_internal,
-			$self, $old_eml, undef, $new_eml, $sref) or return;
-
+	my $restore = $self->{ibx}->with_umask;
+	my $rewritten = rewrite_internal($self, $old_eml, undef,
+					$new_eml, $sref) or return;
 	my $rewrites = $rewritten->{rewrites};
 	# ->done is called if there are rewrites since we gc+prune from git
 	$self->idx_init if @$rewrites;
diff --git a/lib/PublicInbox/Xapcmd.pm b/lib/PublicInbox/Xapcmd.pm
index 10685636..c87baa7b 100644
--- a/lib/PublicInbox/Xapcmd.pm
+++ b/lib/PublicInbox/Xapcmd.pm
@@ -256,24 +256,6 @@ sub prepare_run {
 
 sub check_compact () { runnable_or_die($XAPIAN_COMPACT) }
 
-sub _run { # with_umask callback
-	my ($ibx, $cb, $opt) = @_;
-	my $im = $ibx->can('importer') ? $ibx->importer(0) : undef;
-	($im // $ibx)->lock_acquire;
-	my ($tmp, $queue) = prepare_run($ibx, $opt);
-
-	# fine-grained locking if we prepare for reindex
-	if (!$opt->{-coarse_lock}) {
-		prepare_reindex($ibx, $opt);
-		($im // $ibx)->lock_release;
-	}
-
-	$ibx->cleanup if $ibx->can('cleanup');
-	process_queue($queue, $cb, $opt);
-	($im // $ibx)->lock_acquire if !$opt->{-coarse_lock};
-	commit_changes($ibx, $im, $tmp, $opt);
-}
-
 sub run {
 	my ($ibx, $task, $opt) = @_; # task = 'cpdb' or 'compact'
 	my $cb = \&$task;
@@ -296,7 +278,22 @@ sub run {
 
 	local @SIG{keys %SIG} = values %SIG;
 	setup_signals();
-	$ibx->with_umask(\&_run, $ibx, $cb, $opt);
+	my $restore = $ibx->with_umask;
+
+	my $im = $ibx->can('importer') ? $ibx->importer(0) : undef;
+	($im // $ibx)->lock_acquire;
+	my ($tmp, $queue) = prepare_run($ibx, $opt);
+
+	# fine-grained locking if we prepare for reindex
+	if (!$opt->{-coarse_lock}) {
+		prepare_reindex($ibx, $opt);
+		($im // $ibx)->lock_release;
+	}
+
+	$ibx->cleanup if $ibx->can('cleanup');
+	process_queue($queue, $cb, $opt);
+	($im // $ibx)->lock_acquire if !$opt->{-coarse_lock};
+	commit_changes($ibx, $im, $tmp, $opt);
 }
 
 sub cpdb_retryable ($$) {
diff --git a/script/public-inbox-convert b/script/public-inbox-convert
index 750adca4..96931cbf 100755
--- a/script/public-inbox-convert
+++ b/script/public-inbox-convert
@@ -89,7 +89,8 @@ sub link_or_copy ($$) {
 	File::Copy::cp($src, $dst) or die "cp $src, $dst failed: $!\n";
 }
 
-$old->with_umask(sub {
+{
+	my $restore = $old->with_umask;
 	my $old_cfg = "$old->{inboxdir}/config";
 	local $ENV{GIT_CONFIG} = $old_cfg;
 	my $new_cfg = "$new->{inboxdir}/all.git/config";
@@ -110,12 +111,10 @@ $old->with_umask(sub {
 	my $desc = "$old->{inboxdir}/description";
 	link_or_copy($desc, "$new->{inboxdir}/description") if -e $desc;
 	my $clone = "$old->{inboxdir}/cloneurl";
-	if (-e $clone) {
-		warn <<"";
+	warn <<"" if -e $clone;
 $clone may not be valid after migrating to v2, not copying
 
-	}
-});
+}
 my $state = '';
 my $head = $old->{ref_head} || 'HEAD';
 my ($rd, $pid) = $old->git->popen(qw(fast-export --use-done-feature), $head);

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 5/6] searchidx: use vstring to improve readability
  2023-04-07 12:40 [PATCH 0/6] cindex fixes, and some spring cleaning Eric Wong
                   ` (3 preceding siblings ...)
  2023-04-07 12:40 ` [PATCH 4/6] umask: rely on the OnDestroy-based call where applicable Eric Wong
@ 2023-04-07 12:40 ` Eric Wong
  2023-04-07 12:40 ` [PATCH 6/6] switch git version comparisons to vstrings, too Eric Wong
  5 siblings, 0 replies; 7+ messages in thread
From: Eric Wong @ 2023-04-07 12:40 UTC (permalink / raw)
  To: meta

Perl has native `vstring' encoding for vector (or version)
strings, make use of it instead of relying on difficult-to-read
hex versions and integer shifts.
---
 lib/PublicInbox/SearchIdx.pm | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 511dd4d6..496cea05 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -114,15 +114,15 @@ sub load_xapian_writable () {
 	*sortable_serialise = $xap.'::sortable_serialise';
 	$DB_CREATE_OR_OPEN = eval($xap.'::DB_CREATE_OR_OPEN()');
 	$DB_OPEN = eval($xap.'::DB_OPEN()');
-	my $ver = (eval($xap.'::major_version()') << 16) |
-		(eval($xap.'::minor_version()') << 8) |
-		eval($xap.'::revision()');
-	if ($ver >= 0x10400) {
+	my $ver = eval 'v'.join('.', eval($xap.'::major_version()'),
+				eval($xap.'::minor_version()'),
+				eval($xap.'::revision()'));
+	if ($ver ge 1.4) { # new flags in Xapian 1.4
 		$DB_NO_SYNC = 0x4;
 		$DB_DANGEROUS = 0x10;
 	}
 	# Xapian v1.2.21..v1.2.24 were missing close-on-exec on OFD locks
-	$X->{CLOEXEC_UNSET} = 1 if $ver >= 0x010215 && $ver <= 0x010218;
+	$X->{CLOEXEC_UNSET} = 1 if $ver ge v1.2.21 && $ver le v1.2.24;
 	1;
 }
 

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 6/6] switch git version comparisons to vstrings, too
  2023-04-07 12:40 [PATCH 0/6] cindex fixes, and some spring cleaning Eric Wong
                   ` (4 preceding siblings ...)
  2023-04-07 12:40 ` [PATCH 5/6] searchidx: use vstring to improve readability Eric Wong
@ 2023-04-07 12:40 ` Eric Wong
  5 siblings, 0 replies; 7+ messages in thread
From: Eric Wong @ 2023-04-07 12:40 UTC (permalink / raw)
  To: meta

There's too many require_git callsites in t/*.t to change,
but we can make the rest of the code more readable and reuse
PublicInbox::Git::version() in our test suite, too.
---
 lib/PublicInbox/Git.pm        | 11 +++++------
 lib/PublicInbox/LeiMirror.pm  |  2 +-
 lib/PublicInbox/TestCommon.pm | 16 ++++++----------
 3 files changed, 12 insertions(+), 17 deletions(-)

diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm
index 6f8232cf..e35d5277 100644
--- a/lib/PublicInbox/Git.pm
+++ b/lib/PublicInbox/Git.pm
@@ -33,7 +33,7 @@ our $async_warn; # true in read-only daemons
 # 65: SHA-256 hex size + "\n" in preparation for git using non-SHA1
 use constant {
 	MAX_INFLIGHT => 512 * 3 / (65 + length('contents ')),
-	BATCH_CMD_VER => (2 << 24 | 36 << 16), # git 2.36+
+	BATCH_CMD_VER => v2.36.0, # git 2.36+
 };
 
 my %GIT_ESC = (
@@ -67,8 +67,7 @@ sub check_git_exe () {
 		close($rd) or die "$GIT_EXE --version: $?";
 		$v =~ /\b([0-9]+(?:\.[0-9]+){2})/ or die
 			"$GIT_EXE --version output: $v # unparseable";
-		my @v = split(/\./, $1, 3);
-		$GIT_VER = ($v[0] << 24) | ($v[1] << 16) | $v[2];
+		$GIT_VER = eval("v$1") // die "BUG: bad vstring: $1 ($v)";
 		$EXE_ST = $st;
 	}
 }
@@ -159,7 +158,7 @@ sub _bidi_pipe {
 
 	# git 2.31.0+ supports -c core.abbrev=no, don't bother with
 	# core.abbrev=64 since not many releases had SHA-256 prior to 2.31
-	my $abbr = $GIT_VER < (2 << 24 | 31 << 16) ? 40 : 'no';
+	my $abbr = $GIT_VER lt v2.31.0 ? 40 : 'no';
 	my @cmd = ($GIT_EXE, "--git-dir=$gd", '-c', "core.abbrev=$abbr",
 			'cat-file', "--$batch");
 	if ($err) {
@@ -316,7 +315,7 @@ sub cat_async_wait ($) {
 sub batch_prepare ($) {
 	my ($self) = @_;
 	check_git_exe();
-	if ($GIT_VER >= BATCH_CMD_VER) {
+	if ($GIT_VER ge BATCH_CMD_VER) {
 		_bidi_pipe($self, qw(batch-command in out pid err_c));
 		$self->{-bc} = 1;
 	} else {
@@ -370,7 +369,7 @@ sub check_async_begin ($) {
 	die 'BUG: already in async check' if $self->{inflight_c};
 	cleanup($self) if alternates_changed($self);
 	check_git_exe();
-	if ($GIT_VER >= BATCH_CMD_VER) {
+	if ($GIT_VER ge BATCH_CMD_VER) {
 		_bidi_pipe($self, qw(batch-command in out pid err_c));
 		$self->{-bc} = 1;
 		$self->{inflight} = [];
diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm
index e0709fbd..8f749688 100644
--- a/lib/PublicInbox/LeiMirror.pm
+++ b/lib/PublicInbox/LeiMirror.pm
@@ -276,7 +276,7 @@ sub fetch_args ($$) {
 			($lei->{opt}->{jobs} // 1) > 1;
 	push @cmd, '-v' if $lei->{opt}->{verbose};
 	push(@cmd, '-p') if $lei->{opt}->{prune};
-	PublicInbox::Git::version() >= ((2 << 24) | (29 << 16)) and
+	PublicInbox::Git::version() ge v2.29.0 and
 		push(@cmd, '--no-write-fetch-head');
 	@cmd;
 }
diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm
index 494323c0..aa2abc43 100644
--- a/lib/PublicInbox/TestCommon.pm
+++ b/lib/PublicInbox/TestCommon.pm
@@ -10,6 +10,7 @@ use Fcntl qw(F_SETFD :seek);
 use POSIX qw(dup2);
 use IO::Socket::INET;
 use File::Spec;
+use Scalar::Util qw(isvstring);
 our @EXPORT;
 my $lei_loud = $ENV{TEST_LEI_ERR_LOUD};
 my $tail_cmd = $ENV{TAIL};
@@ -109,17 +110,12 @@ sub have_xapian_compact (;$) {
 
 sub require_git ($;$) {
 	my ($req, $nr) = @_;
-	state ($cur_int, $cur_ver);
-	$cur_int //= do {
-		chomp($cur_ver = xqx([qw(git --version)]));
-		my @v = ($cur_ver =~ /version (\d+)\.(\d+)(?:\.(\d+))?/);
-		($v[0] << 24) | ($v[1] << 16) | ($v[2] // 0);
-	};
-
-	my ($req_maj, $req_min, $req_sub) = split(/\./, $req);
-	my $req_int = ($req_maj << 24) | ($req_min << 16) | ($req_sub // 0);
+	require PublicInbox::Git;
+	state $cur_vstr = PublicInbox::Git::version();
+	$req = eval("v$req") unless isvstring($req);
 
-	return 1 if $cur_int >= $req_int;
+	return 1 if $cur_vstr ge $req;
+	state $cur_ver = sprintf('%vd', $cur_vstr);
 	return plan skip_all => "git $req+ required, have $cur_ver" if !$nr;
 	defined(wantarray) ? undef :
 		skip("git $req+ required (have $cur_ver), skipping $nr tests")

^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2023-04-07 12:40 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-04-07 12:40 [PATCH 0/6] cindex fixes, and some spring cleaning Eric Wong
2023-04-07 12:40 ` [PATCH 1/6] cindex: improve progress display Eric Wong
2023-04-07 12:40 ` [PATCH 2/6] cindex: preserve indexlevel across invocations Eric Wong
2023-04-07 12:40 ` [PATCH 3/6] umask: hoist out of InboxWritable Eric Wong
2023-04-07 12:40 ` [PATCH 4/6] umask: rely on the OnDestroy-based call where applicable Eric Wong
2023-04-07 12:40 ` [PATCH 5/6] searchidx: use vstring to improve readability Eric Wong
2023-04-07 12:40 ` [PATCH 6/6] switch git version comparisons to vstrings, too Eric Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).