* [PATCH 1/6] cindex: improve progress display
2023-04-07 12:40 [PATCH 0/6] cindex fixes, and some spring cleaning Eric Wong
@ 2023-04-07 12:40 ` Eric Wong
2023-04-07 12:40 ` [PATCH 2/6] cindex: preserve indexlevel across invocations Eric Wong
` (4 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Eric Wong @ 2023-04-07 12:40 UTC (permalink / raw)
To: meta
Instead of displaying the total number of changes across all
repos next to the repo path ("$GIT_DIR: $TOTAL commits"), we'll
only show the number of changes made in that repo.
We'll also note when a prune is complete on a shard, since
prunes may often be expensive no-ops.
---
lib/PublicInbox/CodeSearchIdx.pm | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/lib/PublicInbox/CodeSearchIdx.pm b/lib/PublicInbox/CodeSearchIdx.pm
index f3d07f25..5f20325a 100644
--- a/lib/PublicInbox/CodeSearchIdx.pm
+++ b/lib/PublicInbox/CodeSearchIdx.pm
@@ -437,6 +437,7 @@ sub partition_refs ($$$) {
$fh;
} @RDONLY_XDB;
+ my $n0 = $NCHANGE;
while (defined(my $cmt = <$rfh>)) {
chomp $cmt;
my $n = hex(substr($cmt, 0, 8)) % scalar(@RDONLY_XDB);
@@ -458,7 +459,8 @@ sub partition_refs ($$$) {
close($rfh);
return () if $DO_QUIT;
if (!$? || (($? & 127) == POSIX::SIGPIPE && $seen > $SEEN_MAX)) {
- progress($self, "$git->{git_dir}: $NCHANGE commits");
+ my $n = $NCHANGE - $n0;
+ progress($self, "$git->{git_dir}: $n commits") if $n;
for my $fh (@shard_in) {
$fh->flush or die "flush: $!";
sysseek($fh, 0, SEEK_SET) or die "seek: $!";
@@ -697,6 +699,8 @@ sub event_step { # may be requeued via DS
cidx_ckpoint($self);
return PublicInbox::DS::requeue($self) if $PRUNE_CUR <= $PRUNE_MAX;
send($PRUNE_OP_P, "prune_done $self->{shard}", MSG_EOR);
+ $PRUNE_NR //= 0;
+ progress($self, "prune [$self->{shard}] $PRUNE_NR done");
$TMP_GIT->cleanup;
$TMP_GIT = $PRUNE_OP_P = $PRUNE_CUR = $PRUNE_MAX = undef;
%ACTIVE_GIT_DIR = ();
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 2/6] cindex: preserve indexlevel across invocations
2023-04-07 12:40 [PATCH 0/6] cindex fixes, and some spring cleaning Eric Wong
2023-04-07 12:40 ` [PATCH 1/6] cindex: improve progress display Eric Wong
@ 2023-04-07 12:40 ` Eric Wong
2023-04-07 12:40 ` [PATCH 3/6] umask: hoist out of InboxWritable Eric Wong
` (3 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Eric Wong @ 2023-04-07 12:40 UTC (permalink / raw)
To: meta
This matches the behavior of mail indexers and ensures `medium'
indices don't grow unexpectedly to be come `full' indices.
---
lib/PublicInbox/CodeSearchIdx.pm | 15 +++++++++--
lib/PublicInbox/SearchIdx.pm | 2 +-
t/cindex.t | 45 ++++++++++++++++++++++++++++++++
3 files changed, 59 insertions(+), 3 deletions(-)
diff --git a/lib/PublicInbox/CodeSearchIdx.pm b/lib/PublicInbox/CodeSearchIdx.pm
index 5f20325a..3a3fc03e 100644
--- a/lib/PublicInbox/CodeSearchIdx.pm
+++ b/lib/PublicInbox/CodeSearchIdx.pm
@@ -85,7 +85,6 @@ sub new {
xpfx => "$dir/cidx". PublicInbox::CodeSearch::CIDX_SCHEMA_VER,
cidx_dir => $dir,
creat => 1, # TODO: get rid of this, should be implicit
- indexlevel => $l,
transact_bytes => 0, # for checkpoint
total_bytes => 0, # for lock_release
current_info => '',
@@ -617,16 +616,28 @@ sub cidx_init ($) {
}
$self->lock_acquire;
my @shards;
+ my $l = $self->{indexlevel} //= $self->{-opt}->{indexlevel};
+
for my $n (0..($self->{nshard} - 1)) {
my $shard = bless { %$self, shard => $n }, ref($self);
delete @$shard{qw(lockfh lock_path)};
- $shard->idx_acquire;
+ my $xdb = $shard->idx_acquire;
+ if (!$n) {
+ if (($l // '') eq 'medium') {
+ $xdb->set_metadata('indexlevel', $l);
+ } elsif (($l // '') eq 'full') {
+ $xdb->set_metadata('indexlevel', ''); # unset
+ }
+ $l ||= $xdb->get_metadata('indexlevel') || 'full';
+ }
+ $shard->{indexlevel} = $l;
$shard->idx_release;
$shard->wq_workers_start("cidx shard[$n]", 1, $SIGSET, {
siblings => \@shards, # for ipc_atfork_child
}, \&shard_done_wait, $self);
push @shards, $shard;
}
+ $self->{indexlevel} //= $l;
# this warning needs to happen after idx_acquire
state $once;
warn <<EOM if $PublicInbox::Search::X{CLOEXEC_UNSET} && !$once++;
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index f36c8f97..699af432 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -90,7 +90,7 @@ sub new {
$self;
}
-sub need_xapian ($) { $_[0]->{indexlevel} =~ $xapianlevels }
+sub need_xapian ($) { ($_[0]->{indexlevel} // 'full') =~ $xapianlevels }
sub idx_release {
my ($self, $wake) = @_;
diff --git a/t/cindex.t b/t/cindex.t
index 9da0ba69..d40f73ff 100644
--- a/t/cindex.t
+++ b/t/cindex.t
@@ -4,11 +4,13 @@
use v5.12;
use PublicInbox::TestCommon;
use Cwd qw(getcwd abs_path);
+use List::Util qw(sum);
require_mods(qw(json Search::Xapian));
use_ok 'PublicInbox::CodeSearchIdx';
require PublicInbox::Import;
my ($tmp, $for_destroy) = tmpdir();
my $pwd = getcwd();
+my @unused_keys = qw(last_commit has_threadid skip_docdata);
# I reworked CodeSearchIdx->shard_worker to handle empty trees
# in the initial commit generated by cvs2svn for xapian.git
@@ -71,7 +73,48 @@ ok(run_script([qw(-cindex --dangerous -q -d), "$tmp/ext", $zp, "$tmp/wt0"]),
ok(-e "$tmp/ext/cidx.lock", 'external dir created');
ok(!-d "$zp/.git/public-inbox-cindex", 'no cindex in original coderepo');
+ok(run_script([qw(-cindex -L medium --dangerous -q -d),
+ "$tmp/med", $zp, "$tmp/wt0"]), 'cindex external medium');
+
+my $no_metadata_set = sub {
+ my ($i, $extra, $xdb) = @_;
+ for my $xdb (@$xdb) {
+ for my $k (@unused_keys, @$extra) {
+ is($xdb->get_metadata($k) // '', '',
+ "metadata $k unset in shard #$i");
+ }
+ ++$i;
+ }
+};
+
+{
+ my $mid_size = sum(map { -s $_ } glob("$tmp/med/cidx*/*/*"));
+ my $full_size = sum(map { -s $_ } glob("$tmp/ext/cidx*/*/*"));
+ ok($full_size > $mid_size, 'full size > mid size') or
+ diag "full=$full_size mid=$mid_size";
+ for my $l (qw(med ext)) {
+ ok(run_script([qw(-cindex -q --reindex -u -d), "$tmp/$l"]),
+ "reindex $l");
+ }
+ $mid_size = sum(map { -s $_ } glob("$tmp/med/cidx*/*/*"));
+ $full_size = sum(map { -s $_ } glob("$tmp/ext/cidx*/*/*"));
+ ok($full_size > $mid_size, 'full size > mid size after reindex') or
+ diag "full=$full_size mid=$mid_size";
+ my $csrch = PublicInbox::CodeSearch->new("$tmp/med");
+ my ($xdb0, @xdb) = $csrch->xdb_shards_flat;
+ $no_metadata_set->(0, [], [ $xdb0 ]);
+ is($xdb0->get_metadata('indexlevel'), 'medium',
+ 'indexlevel set in shard #0');
+ $no_metadata_set->(1, ['indexlevel'], \@xdb);
+
+ ok(run_script([qw(-cindex -q -L full --reindex -u -d), "$tmp/med"]),
+ 'reindex medium as full');
+ @xdb = $csrch->xdb_shards_flat;
+ $no_metadata_set->(0, ['indexlevel'], \@xdb);
+}
+
use_ok 'PublicInbox::CodeSearch';
+
if ('multi-repo search') {
my $csrch = PublicInbox::CodeSearch->new("$tmp/ext");
my $mset = $csrch->mset('NUL');
@@ -86,6 +129,8 @@ if ('multi-repo search') {
$mset = $csrch->mset('NUL', { git_dir => abs_path("$zp/.git") });
@have = sort(map { $_->get_document->get_data } $mset->items);
is_xdeeply(\@have, $exp, 'got expected subjects w/ GIT_DIR filter');
+ my @xdb = $csrch->xdb_shards_flat;
+ $no_metadata_set->(0, ['indexlevel'], \@xdb);
}
if ('--update') {
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 3/6] umask: hoist out of InboxWritable
2023-04-07 12:40 [PATCH 0/6] cindex fixes, and some spring cleaning Eric Wong
2023-04-07 12:40 ` [PATCH 1/6] cindex: improve progress display Eric Wong
2023-04-07 12:40 ` [PATCH 2/6] cindex: preserve indexlevel across invocations Eric Wong
@ 2023-04-07 12:40 ` Eric Wong
2023-04-07 12:40 ` [PATCH 4/6] umask: rely on the OnDestroy-based call where applicable Eric Wong
` (2 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Eric Wong @ 2023-04-07 12:40 UTC (permalink / raw)
To: meta
Since CodeSearchIdx doesn't deal with inboxes, it makes sense
to split it out from inbox-specific code and start moving
towards using OnDestroy to restore the umask at the end of
scope and reducing extra functions.
---
MANIFEST | 1 +
lib/PublicInbox/CodeSearchIdx.pm | 28 ++++---------
lib/PublicInbox/ExtSearchIdx.pm | 3 +-
lib/PublicInbox/InboxWritable.pm | 70 +-------------------------------
lib/PublicInbox/SearchIdx.pm | 6 ++-
lib/PublicInbox/Umask.pm | 70 ++++++++++++++++++++++++++++++++
t/search.t | 7 ++--
7 files changed, 91 insertions(+), 94 deletions(-)
create mode 100644 lib/PublicInbox/Umask.pm
diff --git a/MANIFEST b/MANIFEST
index a0e64c6a..c338f0f4 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -340,6 +340,7 @@ lib/PublicInbox/TestCommon.pm
lib/PublicInbox/Tmpfile.pm
lib/PublicInbox/URIimap.pm
lib/PublicInbox/URInntps.pm
+lib/PublicInbox/Umask.pm
lib/PublicInbox/Unsubscribe.pm
lib/PublicInbox/UserContent.pm
lib/PublicInbox/V2Writable.pm
diff --git a/lib/PublicInbox/CodeSearchIdx.pm b/lib/PublicInbox/CodeSearchIdx.pm
index 3a3fc03e..78032c00 100644
--- a/lib/PublicInbox/CodeSearchIdx.pm
+++ b/lib/PublicInbox/CodeSearchIdx.pm
@@ -770,28 +770,23 @@ sub init_tmp_git_dir ($) {
sub prep_umask ($) {
my ($self) = @_;
- my $um;
- my $cur = umask;
if ($self->{-cidx_internal}) { # respect core.sharedRepository
@{$self->{git_dirs}} == 1 or die 'BUG: only for GIT_DIR';
- # yuck, FIXME move umask handling out of inbox-specific stuff
- require PublicInbox::InboxWritable;
- my $git = PublicInbox::Git->new($self->{git_dirs}->[0]);
- chomp($um = $git->qx('config', 'core.sharedRepository') // '');
- $um = PublicInbox::InboxWritable::_git_config_perm(undef, $um);
- $um = PublicInbox::InboxWritable::_umask_for($um);
- umask == $um or progress($self, 'umask from git: ',
- sprintf('0%03o', $um));
+ local $self->{git} =
+ PublicInbox::Git->new($self->{git_dirs}->[0]);
+ $self->with_umask;
} elsif (-d $self->{cidx_dir}) { # respect existing perms
my @st = stat(_);
- $um = (~$st[2] & 0777);
+ my $um = (~$st[2] & 0777);
+ $self->{umask} = $um; # for SearchIdx->with_umask
umask == $um or progress($self, 'using umask from ',
$self->{cidx_dir}, ': ',
sprintf('0%03o', $um));
- }
- defined($um) ?
- PublicInbox::OnDestroy->new(\&CORE::umask, umask($um)) :
+ PublicInbox::OnDestroy->new($$, \&CORE::umask, umask($um));
+ } else {
+ $self->{umask} = umask; # for SearchIdx->with_umask
undef;
+ }
}
sub start_prune ($) {
@@ -896,9 +891,4 @@ sub shard_done_wait { # awaitpid cb via ipc_worker_reap
PublicInbox::DS::enqueue_reap() if !shards_active(); # once more for PLC
}
-sub with_umask { # TODO get rid of this treewide and rely on OnDestroy
- my ($self, $cb, @arg) = @_;
- $cb->(@arg);
-}
-
1;
diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm
index 5445b156..6f3711ba 100644
--- a/lib/PublicInbox/ExtSearchIdx.pm
+++ b/lib/PublicInbox/ExtSearchIdx.pm
@@ -16,7 +16,7 @@
package PublicInbox::ExtSearchIdx;
use strict;
use v5.10.1;
-use parent qw(PublicInbox::ExtSearch PublicInbox::Lock);
+use parent qw(PublicInbox::ExtSearch PublicInbox::Umask PublicInbox::Lock);
use Carp qw(croak carp);
use Scalar::Util qw(blessed);
use Sys::Hostname qw(hostname);
@@ -1397,7 +1397,6 @@ sub eidx_watch { # public-inbox-extindex --watch main loop
no warnings 'once';
*done = \&PublicInbox::V2Writable::done;
-*with_umask = \&PublicInbox::InboxWritable::with_umask;
*parallel_init = \&PublicInbox::V2Writable::parallel_init;
*nproc_shards = \&PublicInbox::V2Writable::nproc_shards;
*sync_prepare = \&PublicInbox::V2Writable::sync_prepare;
diff --git a/lib/PublicInbox/InboxWritable.pm b/lib/PublicInbox/InboxWritable.pm
index 022e2a69..65952aa2 100644
--- a/lib/PublicInbox/InboxWritable.pm
+++ b/lib/PublicInbox/InboxWritable.pm
@@ -1,25 +1,17 @@
-# Copyright (C) 2018-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Extends read-only Inbox for writing
package PublicInbox::InboxWritable;
use strict;
use v5.10.1;
-use parent qw(PublicInbox::Inbox Exporter);
+use parent qw(PublicInbox::Inbox PublicInbox::Umask Exporter);
use PublicInbox::Import;
use PublicInbox::Filter::Base qw(REJECT);
use Errno qw(ENOENT);
our @EXPORT_OK = qw(eml_from_path);
use Fcntl qw(O_RDONLY O_NONBLOCK);
-use constant {
- PERM_UMASK => 0,
- OLD_PERM_GROUP => 1,
- OLD_PERM_EVERYBODY => 2,
- PERM_GROUP => 0660,
- PERM_EVERYBODY => 0664,
-};
-
sub new {
my ($class, $ibx, $creat_opt) = @_;
return $ibx if ref($ibx) eq $class;
@@ -176,64 +168,6 @@ sub import_mbox {
$im->done;
}
-sub _read_git_config_perm {
- my ($self) = @_;
- chomp(my $perm = $self->git->qx('config', 'core.sharedRepository'));
- $perm;
-}
-
-sub _git_config_perm {
- my $self = shift;
- my $perm = scalar @_ ? $_[0] : _read_git_config_perm($self);
- return PERM_UMASK if (!defined($perm) || $perm eq '');
- return PERM_UMASK if ($perm eq 'umask');
- return PERM_GROUP if ($perm eq 'group');
- if ($perm =~ /\A(?:all|world|everybody)\z/) {
- return PERM_EVERYBODY;
- }
- return PERM_GROUP if ($perm =~ /\A(?:true|yes|on|1)\z/);
- return PERM_UMASK if ($perm =~ /\A(?:false|no|off|0)\z/);
-
- my $i = oct($perm);
- return PERM_UMASK if ($i == PERM_UMASK);
- return PERM_GROUP if ($i == OLD_PERM_GROUP);
- return PERM_EVERYBODY if ($i == OLD_PERM_EVERYBODY);
-
- if (($i & 0600) != 0600) {
- die "core.sharedRepository mode invalid: ".
- sprintf('%.3o', $i) . "\nOwner must have permissions\n";
- }
- ($i & 0666);
-}
-
-sub _umask_for {
- my ($perm) = @_; # _git_config_perm return value
- my $rv = $perm;
- return umask if $rv == 0;
-
- # set +x bit if +r or +w were set
- $rv |= 0100 if ($rv & 0600);
- $rv |= 0010 if ($rv & 0060);
- $rv |= 0001 if ($rv & 0006);
- (~$rv & 0777);
-}
-
-sub with_umask {
- my ($self, $cb, @arg) = @_;
- my $old = umask($self->{umask} //= umask_prepare($self));
- my $rv = eval { $cb->(@arg) };
- my $err = $@;
- umask $old;
- die $err if $err;
- $rv;
-}
-
-sub umask_prepare {
- my ($self) = @_;
- my $perm = _git_config_perm($self);
- _umask_for($perm);
-}
-
sub cleanup ($) {
delete @{$_[0]}{qw(over mm git search)};
}
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 699af432..69ab30e6 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -9,7 +9,8 @@
package PublicInbox::SearchIdx;
use strict;
use v5.10.1;
-use parent qw(PublicInbox::Search PublicInbox::Lock Exporter);
+use parent qw(PublicInbox::Search PublicInbox::Lock PublicInbox::Umask
+ Exporter);
use PublicInbox::Eml;
use PublicInbox::Search qw(xap_terms);
use PublicInbox::InboxWritable;
@@ -821,7 +822,8 @@ sub unindex_both { # git->cat_async callback
sub with_umask {
my $self = shift;
- ($self->{ibx} // $self->{eidx})->with_umask(@_);
+ my $owner = $self->{ibx} // $self->{eidx};
+ $owner ? $owner->with_umask(@_) : $self->SUPER::with_umask(@_)
}
# called by public-inbox-index
diff --git a/lib/PublicInbox/Umask.pm b/lib/PublicInbox/Umask.pm
new file mode 100644
index 00000000..00772ce5
--- /dev/null
+++ b/lib/PublicInbox/Umask.pm
@@ -0,0 +1,70 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# base class to ensures Xapian||SQLite files respect core.sharedRepository
+# of git repos
+package PublicInbox::Umask;
+use v5.12;
+use PublicInbox::OnDestroy;
+
+use constant {
+ PERM_UMASK => 0,
+ OLD_PERM_GROUP => 1,
+ OLD_PERM_EVERYBODY => 2,
+ PERM_GROUP => 0660,
+ PERM_EVERYBODY => 0664,
+};
+
+sub _read_git_config_perm {
+ my ($self) = @_;
+ chomp(my $perm = $self->git->qx('config', 'core.sharedRepository'));
+ $perm;
+}
+
+sub _git_config_perm {
+ my $self = shift;
+ my $perm = scalar @_ ? $_[0] : _read_git_config_perm($self);
+ $perm //= '';
+ return PERM_UMASK if $perm eq '' || $perm eq 'umask';
+ return PERM_GROUP if $perm eq 'group';
+ return PERM_EVERYBODY if $perm =~ /\A(?:all|world|everybody)\z/;
+ return PERM_GROUP if ($perm =~ /\A(?:true|yes|on|1)\z/);
+ return PERM_UMASK if ($perm =~ /\A(?:false|no|off|0)\z/);
+
+ my $i = oct($perm);
+ return PERM_UMASK if $i == PERM_UMASK;
+ return PERM_GROUP if $i == OLD_PERM_GROUP;
+ return PERM_EVERYBODY if $i == OLD_PERM_EVERYBODY;
+
+ if (($i & 0600) != 0600) {
+ die "core.sharedRepository mode invalid: ".
+ sprintf('%.3o', $i) . "\nOwner must have permissions\n";
+ }
+ ($i & 0666);
+}
+
+sub _umask_for {
+ my ($perm) = @_; # _git_config_perm return value
+ my $rv = $perm;
+ return umask if $rv == 0;
+
+ # set +x bit if +r or +w were set
+ $rv |= 0100 if ($rv & 0600);
+ $rv |= 0010 if ($rv & 0060);
+ $rv |= 0001 if ($rv & 0006);
+ (~$rv & 0777);
+}
+
+sub with_umask {
+ my ($self, $cb, @arg) = @_;
+ my $old = umask($self->{umask} //= umask_prepare($self));
+ my $restore = PublicInbox::OnDestroy->new($$, \&CORE::umask, $old);
+ $cb ? $cb->(@arg) : $restore;
+}
+
+sub umask_prepare {
+ my ($self) = @_;
+ _umask_for(_git_config_perm($self));
+}
+
+1;
diff --git a/t/search.t b/t/search.t
index cf639a6d..ea5a2a5a 100644
--- a/t/search.t
+++ b/t/search.t
@@ -41,8 +41,9 @@ sub oct_is ($$$) {
{
# git repository perms
+ use_ok 'PublicInbox::Umask';
oct_is($ibx->_git_config_perm(),
- &PublicInbox::InboxWritable::PERM_GROUP,
+ PublicInbox::Umask::PERM_GROUP(),
'undefined permission is group');
my @t = (
[ '0644', 0022, '644 => umask(0022)' ],
@@ -54,8 +55,8 @@ sub oct_is ($$$) {
);
for (@t) {
my ($perm, $exp, $msg) = @$_;
- my $got = PublicInbox::InboxWritable::_umask_for(
- PublicInbox::InboxWritable->_git_config_perm($perm));
+ my $got = PublicInbox::Umask::_umask_for(
+ PublicInbox::Umask->_git_config_perm($perm));
oct_is($got, $exp, $msg);
}
}
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 4/6] umask: rely on the OnDestroy-based call where applicable
2023-04-07 12:40 [PATCH 0/6] cindex fixes, and some spring cleaning Eric Wong
` (2 preceding siblings ...)
2023-04-07 12:40 ` [PATCH 3/6] umask: hoist out of InboxWritable Eric Wong
@ 2023-04-07 12:40 ` Eric Wong
2023-04-07 12:40 ` [PATCH 5/6] searchidx: use vstring to improve readability Eric Wong
2023-04-07 12:40 ` [PATCH 6/6] switch git version comparisons to vstrings, too Eric Wong
5 siblings, 0 replies; 7+ messages in thread
From: Eric Wong @ 2023-04-07 12:40 UTC (permalink / raw)
To: meta
This lets us get rid of some awkwardness around the old API
and single-use subroutines while saving us some LoC.
---
lib/PublicInbox/ExtSearchIdx.pm | 14 +++++--------
lib/PublicInbox/SearchIdx.pm | 19 ++++++------------
lib/PublicInbox/V2Writable.pm | 23 +++++++++-------------
lib/PublicInbox/Xapcmd.pm | 35 +++++++++++++++------------------
script/public-inbox-convert | 9 ++++-----
5 files changed, 40 insertions(+), 60 deletions(-)
diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm
index 6f3711ba..6856ae66 100644
--- a/lib/PublicInbox/ExtSearchIdx.pm
+++ b/lib/PublicInbox/ExtSearchIdx.pm
@@ -1179,12 +1179,6 @@ sub update_last_commit { # overrides V2Writable
$self->{oidx}->eidx_meta($meta_key, $latest_cmt);
}
-sub _idx_init { # with_umask callback
- my ($self, $opt) = @_;
- PublicInbox::V2Writable::_idx_init($self, $opt); # acquires ei.lock
- $self->{midx} = PublicInbox::MiscIdx->new($self);
-}
-
sub symlink_packs ($$) {
my ($ibx, $pd) = @_;
my $ret = 0;
@@ -1270,15 +1264,17 @@ sub idx_init { # similar to V2Writable
}
($has_new || $prune_nr || $new ne '') and
$self->{mg}->write_alternates($mode, $alt, $new);
- $git_midx and $self->with_umask(sub {
+ my $restore = $self->with_umask;
+ if ($git_midx) {
my @cmd = ('multi-pack-index');
push @cmd, '--no-progress' if ($opt->{quiet}//0) > 1;
my $lk = $self->lock_for_scope;
system('git', "--git-dir=$ALL", @cmd, 'write');
# ignore errors, fairly new command, may not exist
- });
+ }
$self->parallel_init($self->{indexlevel});
- $self->with_umask(\&_idx_init, $self, $opt);
+ PublicInbox::V2Writable::_idx_init($self, $opt); # acquires ei.lock
+ $self->{midx} = PublicInbox::MiscIdx->new($self);
$self->{oidx}->begin_lazy;
$self->{oidx}->eidx_prep;
$self->{midx}->create_xdb if $new ne '';
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 69ab30e6..511dd4d6 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -1110,8 +1110,10 @@ sub DESTROY {
$_[0]->{lockfh} = undef;
}
-sub _begin_txn {
+sub begin_txn_lazy {
my ($self) = @_;
+ return if $self->{txn};
+ my $restore = $self->with_umask;
my $xdb = $self->{xdb} || idx_acquire($self);
$self->{oidx}->begin_lazy if $self->{oidx};
$xdb->begin_transaction if $xdb;
@@ -1119,11 +1121,6 @@ sub _begin_txn {
$xdb;
}
-sub begin_txn_lazy {
- my ($self) = @_;
- $self->with_umask(\&_begin_txn, $self) if !$self->{txn};
-}
-
# store 'indexlevel=medium' in v2 shard=0 and v1 (only one shard)
# This metadata is read by InboxWritable->detect_indexlevel:
sub set_metadata_once {
@@ -1147,8 +1144,10 @@ sub set_metadata_once {
}
}
-sub _commit_txn {
+sub commit_txn_lazy {
my ($self) = @_;
+ return unless delete($self->{txn});
+ my $restore = $self->with_umask;
if (my $eidx = $self->{eidx}) {
$eidx->git->async_wait_all;
$eidx->{transact_bytes} = 0;
@@ -1160,12 +1159,6 @@ sub _commit_txn {
$self->{oidx}->commit_lazy if $self->{oidx};
}
-sub commit_txn_lazy {
- my ($self) = @_;
- delete($self->{txn}) and
- $self->with_umask(\&_commit_txn, $self);
-}
-
sub eidx_shard_new {
my ($class, $eidx, $shard) = @_;
my $self = bless {
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index d3d13941..856442a9 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -89,13 +89,6 @@ sub init_inbox {
$self->done;
}
-# returns undef on duplicate or spam
-# mimics Import::add and wraps it for v2
-sub add {
- my ($self, $eml, $check_cb) = @_;
- $self->{ibx}->with_umask(\&_add, $self, $eml, $check_cb);
-}
-
sub idx_shard ($$) {
my ($self, $num) = @_;
$self->{idx_shards}->[$num % scalar(@{$self->{idx_shards}})];
@@ -113,8 +106,11 @@ sub do_idx ($$$) {
$n >= $self->{batch_bytes};
}
-sub _add {
+# returns undef on duplicate or spam
+# mimics Import::add and wraps it for v2
+sub add {
my ($self, $mime, $check_cb) = @_;
+ my $restore = $self->{ibx}->with_umask;
# spam check:
if ($check_cb) {
@@ -391,17 +387,16 @@ sub rewrite_internal ($$;$$$) {
# (retval[2]) is not part of the stable API shared with Import->remove
sub remove {
my ($self, $eml, $cmt_msg) = @_;
- my $r = $self->{ibx}->with_umask(\&rewrite_internal,
- $self, $eml, $cmt_msg);
+ my $restore = $self->{ibx}->with_umask;
+ my $r = rewrite_internal($self, $eml, $cmt_msg);
defined($r) && defined($r->[0]) ? @$r: undef;
}
sub _replace ($$;$$) {
my ($self, $old_eml, $new_eml, $sref) = @_;
- my $arg = [ $self, $old_eml, undef, $new_eml, $sref ];
- my $rewritten = $self->{ibx}->with_umask(\&rewrite_internal,
- $self, $old_eml, undef, $new_eml, $sref) or return;
-
+ my $restore = $self->{ibx}->with_umask;
+ my $rewritten = rewrite_internal($self, $old_eml, undef,
+ $new_eml, $sref) or return;
my $rewrites = $rewritten->{rewrites};
# ->done is called if there are rewrites since we gc+prune from git
$self->idx_init if @$rewrites;
diff --git a/lib/PublicInbox/Xapcmd.pm b/lib/PublicInbox/Xapcmd.pm
index 10685636..c87baa7b 100644
--- a/lib/PublicInbox/Xapcmd.pm
+++ b/lib/PublicInbox/Xapcmd.pm
@@ -256,24 +256,6 @@ sub prepare_run {
sub check_compact () { runnable_or_die($XAPIAN_COMPACT) }
-sub _run { # with_umask callback
- my ($ibx, $cb, $opt) = @_;
- my $im = $ibx->can('importer') ? $ibx->importer(0) : undef;
- ($im // $ibx)->lock_acquire;
- my ($tmp, $queue) = prepare_run($ibx, $opt);
-
- # fine-grained locking if we prepare for reindex
- if (!$opt->{-coarse_lock}) {
- prepare_reindex($ibx, $opt);
- ($im // $ibx)->lock_release;
- }
-
- $ibx->cleanup if $ibx->can('cleanup');
- process_queue($queue, $cb, $opt);
- ($im // $ibx)->lock_acquire if !$opt->{-coarse_lock};
- commit_changes($ibx, $im, $tmp, $opt);
-}
-
sub run {
my ($ibx, $task, $opt) = @_; # task = 'cpdb' or 'compact'
my $cb = \&$task;
@@ -296,7 +278,22 @@ sub run {
local @SIG{keys %SIG} = values %SIG;
setup_signals();
- $ibx->with_umask(\&_run, $ibx, $cb, $opt);
+ my $restore = $ibx->with_umask;
+
+ my $im = $ibx->can('importer') ? $ibx->importer(0) : undef;
+ ($im // $ibx)->lock_acquire;
+ my ($tmp, $queue) = prepare_run($ibx, $opt);
+
+ # fine-grained locking if we prepare for reindex
+ if (!$opt->{-coarse_lock}) {
+ prepare_reindex($ibx, $opt);
+ ($im // $ibx)->lock_release;
+ }
+
+ $ibx->cleanup if $ibx->can('cleanup');
+ process_queue($queue, $cb, $opt);
+ ($im // $ibx)->lock_acquire if !$opt->{-coarse_lock};
+ commit_changes($ibx, $im, $tmp, $opt);
}
sub cpdb_retryable ($$) {
diff --git a/script/public-inbox-convert b/script/public-inbox-convert
index 750adca4..96931cbf 100755
--- a/script/public-inbox-convert
+++ b/script/public-inbox-convert
@@ -89,7 +89,8 @@ sub link_or_copy ($$) {
File::Copy::cp($src, $dst) or die "cp $src, $dst failed: $!\n";
}
-$old->with_umask(sub {
+{
+ my $restore = $old->with_umask;
my $old_cfg = "$old->{inboxdir}/config";
local $ENV{GIT_CONFIG} = $old_cfg;
my $new_cfg = "$new->{inboxdir}/all.git/config";
@@ -110,12 +111,10 @@ $old->with_umask(sub {
my $desc = "$old->{inboxdir}/description";
link_or_copy($desc, "$new->{inboxdir}/description") if -e $desc;
my $clone = "$old->{inboxdir}/cloneurl";
- if (-e $clone) {
- warn <<"";
+ warn <<"" if -e $clone;
$clone may not be valid after migrating to v2, not copying
- }
-});
+}
my $state = '';
my $head = $old->{ref_head} || 'HEAD';
my ($rd, $pid) = $old->git->popen(qw(fast-export --use-done-feature), $head);
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 5/6] searchidx: use vstring to improve readability
2023-04-07 12:40 [PATCH 0/6] cindex fixes, and some spring cleaning Eric Wong
` (3 preceding siblings ...)
2023-04-07 12:40 ` [PATCH 4/6] umask: rely on the OnDestroy-based call where applicable Eric Wong
@ 2023-04-07 12:40 ` Eric Wong
2023-04-07 12:40 ` [PATCH 6/6] switch git version comparisons to vstrings, too Eric Wong
5 siblings, 0 replies; 7+ messages in thread
From: Eric Wong @ 2023-04-07 12:40 UTC (permalink / raw)
To: meta
Perl has native `vstring' encoding for vector (or version)
strings, make use of it instead of relying on difficult-to-read
hex versions and integer shifts.
---
lib/PublicInbox/SearchIdx.pm | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 511dd4d6..496cea05 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -114,15 +114,15 @@ sub load_xapian_writable () {
*sortable_serialise = $xap.'::sortable_serialise';
$DB_CREATE_OR_OPEN = eval($xap.'::DB_CREATE_OR_OPEN()');
$DB_OPEN = eval($xap.'::DB_OPEN()');
- my $ver = (eval($xap.'::major_version()') << 16) |
- (eval($xap.'::minor_version()') << 8) |
- eval($xap.'::revision()');
- if ($ver >= 0x10400) {
+ my $ver = eval 'v'.join('.', eval($xap.'::major_version()'),
+ eval($xap.'::minor_version()'),
+ eval($xap.'::revision()'));
+ if ($ver ge 1.4) { # new flags in Xapian 1.4
$DB_NO_SYNC = 0x4;
$DB_DANGEROUS = 0x10;
}
# Xapian v1.2.21..v1.2.24 were missing close-on-exec on OFD locks
- $X->{CLOEXEC_UNSET} = 1 if $ver >= 0x010215 && $ver <= 0x010218;
+ $X->{CLOEXEC_UNSET} = 1 if $ver ge v1.2.21 && $ver le v1.2.24;
1;
}
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 6/6] switch git version comparisons to vstrings, too
2023-04-07 12:40 [PATCH 0/6] cindex fixes, and some spring cleaning Eric Wong
` (4 preceding siblings ...)
2023-04-07 12:40 ` [PATCH 5/6] searchidx: use vstring to improve readability Eric Wong
@ 2023-04-07 12:40 ` Eric Wong
5 siblings, 0 replies; 7+ messages in thread
From: Eric Wong @ 2023-04-07 12:40 UTC (permalink / raw)
To: meta
There's too many require_git callsites in t/*.t to change,
but we can make the rest of the code more readable and reuse
PublicInbox::Git::version() in our test suite, too.
---
lib/PublicInbox/Git.pm | 11 +++++------
lib/PublicInbox/LeiMirror.pm | 2 +-
lib/PublicInbox/TestCommon.pm | 16 ++++++----------
3 files changed, 12 insertions(+), 17 deletions(-)
diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm
index 6f8232cf..e35d5277 100644
--- a/lib/PublicInbox/Git.pm
+++ b/lib/PublicInbox/Git.pm
@@ -33,7 +33,7 @@ our $async_warn; # true in read-only daemons
# 65: SHA-256 hex size + "\n" in preparation for git using non-SHA1
use constant {
MAX_INFLIGHT => 512 * 3 / (65 + length('contents ')),
- BATCH_CMD_VER => (2 << 24 | 36 << 16), # git 2.36+
+ BATCH_CMD_VER => v2.36.0, # git 2.36+
};
my %GIT_ESC = (
@@ -67,8 +67,7 @@ sub check_git_exe () {
close($rd) or die "$GIT_EXE --version: $?";
$v =~ /\b([0-9]+(?:\.[0-9]+){2})/ or die
"$GIT_EXE --version output: $v # unparseable";
- my @v = split(/\./, $1, 3);
- $GIT_VER = ($v[0] << 24) | ($v[1] << 16) | $v[2];
+ $GIT_VER = eval("v$1") // die "BUG: bad vstring: $1 ($v)";
$EXE_ST = $st;
}
}
@@ -159,7 +158,7 @@ sub _bidi_pipe {
# git 2.31.0+ supports -c core.abbrev=no, don't bother with
# core.abbrev=64 since not many releases had SHA-256 prior to 2.31
- my $abbr = $GIT_VER < (2 << 24 | 31 << 16) ? 40 : 'no';
+ my $abbr = $GIT_VER lt v2.31.0 ? 40 : 'no';
my @cmd = ($GIT_EXE, "--git-dir=$gd", '-c', "core.abbrev=$abbr",
'cat-file', "--$batch");
if ($err) {
@@ -316,7 +315,7 @@ sub cat_async_wait ($) {
sub batch_prepare ($) {
my ($self) = @_;
check_git_exe();
- if ($GIT_VER >= BATCH_CMD_VER) {
+ if ($GIT_VER ge BATCH_CMD_VER) {
_bidi_pipe($self, qw(batch-command in out pid err_c));
$self->{-bc} = 1;
} else {
@@ -370,7 +369,7 @@ sub check_async_begin ($) {
die 'BUG: already in async check' if $self->{inflight_c};
cleanup($self) if alternates_changed($self);
check_git_exe();
- if ($GIT_VER >= BATCH_CMD_VER) {
+ if ($GIT_VER ge BATCH_CMD_VER) {
_bidi_pipe($self, qw(batch-command in out pid err_c));
$self->{-bc} = 1;
$self->{inflight} = [];
diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm
index e0709fbd..8f749688 100644
--- a/lib/PublicInbox/LeiMirror.pm
+++ b/lib/PublicInbox/LeiMirror.pm
@@ -276,7 +276,7 @@ sub fetch_args ($$) {
($lei->{opt}->{jobs} // 1) > 1;
push @cmd, '-v' if $lei->{opt}->{verbose};
push(@cmd, '-p') if $lei->{opt}->{prune};
- PublicInbox::Git::version() >= ((2 << 24) | (29 << 16)) and
+ PublicInbox::Git::version() ge v2.29.0 and
push(@cmd, '--no-write-fetch-head');
@cmd;
}
diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm
index 494323c0..aa2abc43 100644
--- a/lib/PublicInbox/TestCommon.pm
+++ b/lib/PublicInbox/TestCommon.pm
@@ -10,6 +10,7 @@ use Fcntl qw(F_SETFD :seek);
use POSIX qw(dup2);
use IO::Socket::INET;
use File::Spec;
+use Scalar::Util qw(isvstring);
our @EXPORT;
my $lei_loud = $ENV{TEST_LEI_ERR_LOUD};
my $tail_cmd = $ENV{TAIL};
@@ -109,17 +110,12 @@ sub have_xapian_compact (;$) {
sub require_git ($;$) {
my ($req, $nr) = @_;
- state ($cur_int, $cur_ver);
- $cur_int //= do {
- chomp($cur_ver = xqx([qw(git --version)]));
- my @v = ($cur_ver =~ /version (\d+)\.(\d+)(?:\.(\d+))?/);
- ($v[0] << 24) | ($v[1] << 16) | ($v[2] // 0);
- };
-
- my ($req_maj, $req_min, $req_sub) = split(/\./, $req);
- my $req_int = ($req_maj << 24) | ($req_min << 16) | ($req_sub // 0);
+ require PublicInbox::Git;
+ state $cur_vstr = PublicInbox::Git::version();
+ $req = eval("v$req") unless isvstring($req);
- return 1 if $cur_int >= $req_int;
+ return 1 if $cur_vstr ge $req;
+ state $cur_ver = sprintf('%vd', $cur_vstr);
return plan skip_all => "git $req+ required, have $cur_ver" if !$nr;
defined(wantarray) ? undef :
skip("git $req+ required (have $cur_ver), skipping $nr tests")
^ permalink raw reply related [flat|nested] 7+ messages in thread