From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-2.7 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, URIBL_SBL,URIBL_SBL_A shortcircuit=no autolearn=no autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 8EB841F934 for ; Sun, 20 Dec 2020 06:30:16 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 1/6] script/public-inbox-*: favor caller-provided pathnames Date: Sun, 20 Dec 2020 06:30:11 +0000 Message-Id: <20201220063016.20959-2-e@80x24.org> In-Reply-To: <20201220063016.20959-1-e@80x24.org> References: <20201220063016.20959-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: We'll try to avoid calling Cwd::abs_path and use File::Spec->rel2abs instead, since abs_path will resolve symlinks the user specified on the command-line. Unfortunately, ->rel2abs still leaves "/.." and "/../" uncollapsed, so we still need to fall back to Cwd::abs_path in those cases. While we are at it, we'll also resolve inboxdir from deep inside v2 directories instead of misdetecting them as v1 bare git repos. In any case, stop matching directories by name and instead rely on the unique combination of st_dev + st_ino on stat() as we started doing in the extindex code. --- lib/PublicInbox/Admin.pm | 102 ++++++++++++++++++++++++------------ script/public-inbox-convert | 27 +++------- script/public-inbox-init | 10 +--- t/admin.t | 16 +++--- 4 files changed, 87 insertions(+), 68 deletions(-) diff --git a/lib/PublicInbox/Admin.pm b/lib/PublicInbox/Admin.pm index 3977d812..ea82133a 100644 --- a/lib/PublicInbox/Admin.pm +++ b/lib/PublicInbox/Admin.pm @@ -6,15 +6,15 @@ package PublicInbox::Admin; use strict; use parent qw(Exporter); -use Cwd qw(abs_path); -use POSIX (); our @EXPORT_OK = qw(setup_signals); use PublicInbox::Config; use PublicInbox::Inbox; use PublicInbox::Spawn qw(popen_rd); +use File::Spec (); sub setup_signals { my ($cb, $arg) = @_; # optional + require POSIX; # we call exit() here instead of _exit() so DESTROY methods # get called (e.g. File::Temp::Dir and PublicInbox::Msgmap) @@ -27,21 +27,43 @@ sub setup_signals { }; } +# abs_path resolves symlinks, so we want to avoid it if rel2abs +# is sufficient and doesn't leave "/.." or "/../" +sub rel2abs_collapsed ($) { + my $p = File::Spec->rel2abs($_[0]); + return $p if substr($p, -3, 3) ne '/..' && index($p, '/../') < 0; # likely + require Cwd; + Cwd::abs_path($p); +} + sub resolve_inboxdir { my ($cd, $ver) = @_; - my $prefix = defined $cd ? $cd : './'; - if (-d $prefix && -f "$prefix/inbox.lock") { # v2 - $$ver = 2 if $ver; - return abs_path($prefix); + my $try = $cd // '.'; + my $root_dev_ino; + while (1) { # favor v2, first + if (-f "$try/inbox.lock") { + $$ver = 2 if $ver; + return rel2abs_collapsed($try); + } elsif (-d $try) { + my @try = stat _; + $root_dev_ino //= do { + my @root = stat('/') or die "stat /: $!\n"; + "$root[0]\0$root[1]"; + }; + last if "$try[0]\0$try[1]" eq $root_dev_ino; + $try .= '/..'; # continue, cd up + } else { + die "`$try' is not a directory\n"; + } } + # try v1 bare git dirs my $cmd = [ qw(git rev-parse --git-dir) ]; my $fh = popen_rd($cmd, undef, {-C => $cd}); my $dir = do { local $/; <$fh> }; - close $fh or die "error in ".join(' ', @$cmd)." (cwd:$cd): $!\n"; + close $fh or die "error in @$cmd (cwd:${\($cd // '.')}): $!\n"; chomp $dir; $$ver = 1 if $ver; - return abs_path($cd) if ($dir eq '.' && defined $cd); - abs_path($dir); + rel2abs_collapsed($dir eq '.' ? ($cd // $dir) : $dir); } # for unconfigured inboxes @@ -78,8 +100,8 @@ sub unconfigured_ibx ($$) { name => $name, address => [ "$name\@example.com" ], inboxdir => $dir, - # TODO: consumers may want to warn on this: - #-unconfigured => 1, + # consumers (-convert) warn on this: + -unconfigured => 1, }); } @@ -95,41 +117,53 @@ sub resolve_inboxes ($;$$) { } my $min_ver = $opt->{-min_inbox_version} || 0; + # lookup inboxes by st_dev + st_ino instead of {inboxdir} pathnames, + # pathnames are not unique due to symlinks and bind mounts my (@old, @ibxs); - my %dir2ibx; - my $all = $opt->{all} ? [] : undef; - if ($cfg) { + if ($opt->{all}) { $cfg->each_inbox(sub { my ($ibx) = @_; - my $path = abs_path($ibx->{inboxdir}); - if (defined($path)) { - $dir2ibx{$path} = $ibx; - push @$all, $ibx if $all; + if (-e $ibx->{inboxdir}) { + push(@ibxs, $ibx) if $ibx->version >= $min_ver; } else { - warn <{name} $ibx->{inboxdir}: $! -EOF + warn "W: $ibx->{name} $ibx->{inboxdir}: $!\n"; } }); - } - if ($all) { - @$all = grep { $_->version >= $min_ver } @$all; - @ibxs = @$all; } else { # directories specified on the command-line - my $i = 0; my @dirs = @$argv; push @dirs, '.' if !@dirs && $opt->{-use_cwd}; - foreach (@dirs) { - my $v; - my $dir = resolve_inboxdir($_, \$v); - if ($v < $min_ver) { + my %s2i; # "st_dev\0st_ino" => array index + for (my $i = 0; $i <= $#dirs; $i++) { + my $dir = $dirs[$i]; + my @st = stat($dir) or die "stat($dir): $!\n"; + $dir = resolve_inboxdir($dir, \(my $ver)); + if ($ver >= $min_ver) { + $s2i{"$st[0]\0$st[1]"} //= $i; + } else { push @old, $dir; - next; } - my $ibx = $dir2ibx{$dir} ||= unconfigured_ibx($dir, $i); - $i++; - push @ibxs, $ibx; } + my $done = \'done'; + eval { + $cfg->each_inbox(sub { + my ($ibx) = @_; + return if $ibx->version < $min_ver; + my $dir = $ibx->{inboxdir}; + if (my @s = stat $dir) { + my $i = delete($s2i{"$s[0]\0$s[1]"}) + // return; + $ibxs[$i] = $ibx; + die $done if !keys(%s2i); + } else { + warn "W: $ibx->{name} $dir: $!\n"; + } + }); + }; + die $@ if $@ && $@ ne $done; + for my $i (sort { $a <=> $b } values %s2i) { + $ibxs[$i] = unconfigured_ibx($dirs[$i], $i); + } + @ibxs = grep { defined } @ibxs; # duplicates are undef } if (@old) { die "-V$min_ver inboxes not supported by $0\n\t", diff --git a/script/public-inbox-convert b/script/public-inbox-convert index b61c743f..fbd527a6 100755 --- a/script/public-inbox-convert +++ b/script/public-inbox-convert @@ -47,34 +47,21 @@ die $help if (scalar(@ARGV) || $new_dir eq '' || $old_dir eq ''); die "$new_dir exists\n" if -d $new_dir; die "$old_dir not a directory\n" unless -d $old_dir; -require Cwd; -Cwd->import('abs_path'); +require PublicInbox::Admin; require PublicInbox::Config; require PublicInbox::InboxWritable; -my $abs = abs_path($old_dir); -die "failed to resolve $old_dir: $!\n" if (!defined($abs)); - my $cfg = PublicInbox::Config->new; -my $old; -$cfg->each_inbox(sub { - $old = $_[0] if abs_path($_[0]->{inboxdir}) eq $old_dir; -}); -if ($old) { - $old = PublicInbox::InboxWritable->new($old); -} else { +my @old = PublicInbox::Admin::resolve_inboxes([$old_dir], undef, $cfg); +@old > 1 and die "BUG: resolved several inboxes from $old_dir:\n", + map { "\t$_->{inboxdir}\n" } @old; +my $old = PublicInbox::InboxWritable->new($old[0]); +if (delete $old->{-unconfigured}) { warn "W: $old_dir not configured in " . PublicInbox::Config::default_file() . "\n"; - $old = PublicInbox::InboxWritable->new({ - inboxdir => $old_dir, - name => 'ignored', - -primary_address => 'old@example.com', - address => [ 'old@example.com' ], - }); } die "Only conversion from v1 inboxes is supported\n" if $old->version >= 2; -require File::Spec; require PublicInbox::Admin; my $detected = PublicInbox::Admin::detect_indexlevel($old); $old->{indexlevel} //= $detected; @@ -88,7 +75,7 @@ if ($opt->{'index'}) { } local %ENV = (%$env, %ENV) if $env; my $new = { %$old }; -$new->{inboxdir} = File::Spec->canonpath($new_dir); +$new->{inboxdir} = PublicInbox::Admin::rel2abs_collapsed($new_dir); $new->{version} = 2; $new = PublicInbox::InboxWritable->new($new, { nproc => $opt->{jobs} }); $new->{-no_fsync} = 1 if !$opt->{fsync}; diff --git a/script/public-inbox-init b/script/public-inbox-init index c775eb31..eb605a51 100755 --- a/script/public-inbox-init +++ b/script/public-inbox-init @@ -138,10 +138,9 @@ close($fh) or die "failed to close $pi_config_tmp: $!\n"; my $pfx = "publicinbox.$name"; my @x = (qw/git config/, "--file=$pi_config_tmp"); -require File::Spec; -$inboxdir = File::Spec->canonpath($inboxdir); +PublicInbox::Admin::rel2abs_collapsed($inboxdir); +die "`\\n' not allowed in `$inboxdir'\n" if index($inboxdir, "\n") >= 0; -die "`\\n' not allowed in `$inboxdir'\n" if $inboxdir =~ /\n/s; if (-f "$inboxdir/inbox.lock") { if (!defined $version) { $version = 2; @@ -186,11 +185,6 @@ if ($skip_docdata) { $ibx->{-skip_docdata} = $skip_docdata; } $ibx->init_inbox(0, $skip_epoch, $skip_artnum); -require Cwd; -my $tmp = Cwd::abs_path($inboxdir); -defined($tmp) or die "failed to resolve $inboxdir: $!\n"; -$inboxdir = $tmp; -die "`\\n' not allowed in `$inboxdir'\n" if $inboxdir =~ /\n/s; # needed for git prior to v2.1.0 umask(0077) if defined $perm; diff --git a/t/admin.t b/t/admin.t index af132577..60c6037d 100644 --- a/t/admin.t +++ b/t/admin.t @@ -12,10 +12,7 @@ my $v2_dir = "$tmpdir/v2"; my ($res, $err, $v); PublicInbox::Import::init_bare($git_dir); -*resolve_inboxdir = do { - no warnings 'once'; - *PublicInbox::Admin::resolve_inboxdir; -}; +*resolve_inboxdir = \&PublicInbox::Admin::resolve_inboxdir; # v1 is(resolve_inboxdir($git_dir), $git_dir, 'top-level GIT_DIR resolved'); @@ -72,16 +69,23 @@ SKIP: { ok(-e "$v2_dir/inbox.lock", 'exists'); is(resolve_inboxdir($v2_dir), $v2_dir, 'resolve_inboxdir works on v2_dir'); - ok(chdir($v2_dir), 'chdir v2_dir OK'); + chdir($v2_dir) or BAIL_OUT "chdir v2_dir: $!"; is(resolve_inboxdir(), $v2_dir, 'resolve_inboxdir works inside v2_dir'); $res = resolve_inboxdir(undef, \$v); is($v, 2, 'version 2 detected'); is($res, $v2_dir, 'detects directory along with version'); # TODO: should work from inside Xapian dirs, and git dirs, here... + PublicInbox::Import::init_bare("$v2_dir/git/0.git"); + my $objdir = "$v2_dir/git/0.git/objects"; + is($v2_dir, resolve_inboxdir($objdir, \$v), 'at $objdir'); + is($v, 2, 'version 2 detected at $objdir'); + chdir($objdir) or BAIL_OUT "chdir objdir: $!"; + is(resolve_inboxdir(undef, \$v), $v2_dir, 'inside $objdir'); + is($v, 2, 'version 2 detected inside $objdir'); } -chdir '/'; +chdir '/' or BAIL_OUT "chdir: $!"; my @pairs = ( '1g' => 1024 ** 3,