unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 1/6] script/public-inbox-*: favor caller-provided pathnames
Date: Sun, 20 Dec 2020 06:30:11 +0000	[thread overview]
Message-ID: <20201220063016.20959-2-e@80x24.org> (raw)
In-Reply-To: <20201220063016.20959-1-e@80x24.org>

We'll try to avoid calling Cwd::abs_path and use
File::Spec->rel2abs instead, since abs_path will resolve
symlinks the user specified on the command-line.

Unfortunately, ->rel2abs still leaves "/.." and "/../"
uncollapsed, so we still need to fall back to Cwd::abs_path in
those cases.

While we are at it, we'll also resolve inboxdir from deep inside
v2 directories instead of misdetecting them as v1 bare git
repos.

In any case, stop matching directories by name and instead rely
on the unique combination of st_dev + st_ino on stat() as we
started doing in the extindex code.
---
 lib/PublicInbox/Admin.pm    | 102 ++++++++++++++++++++++++------------
 script/public-inbox-convert |  27 +++-------
 script/public-inbox-init    |  10 +---
 t/admin.t                   |  16 +++---
 4 files changed, 87 insertions(+), 68 deletions(-)

diff --git a/lib/PublicInbox/Admin.pm b/lib/PublicInbox/Admin.pm
index 3977d812..ea82133a 100644
--- a/lib/PublicInbox/Admin.pm
+++ b/lib/PublicInbox/Admin.pm
@@ -6,15 +6,15 @@
 package PublicInbox::Admin;
 use strict;
 use parent qw(Exporter);
-use Cwd qw(abs_path);
-use POSIX ();
 our @EXPORT_OK = qw(setup_signals);
 use PublicInbox::Config;
 use PublicInbox::Inbox;
 use PublicInbox::Spawn qw(popen_rd);
+use File::Spec ();
 
 sub setup_signals {
 	my ($cb, $arg) = @_; # optional
+	require POSIX;
 
 	# we call exit() here instead of _exit() so DESTROY methods
 	# get called (e.g. File::Temp::Dir and PublicInbox::Msgmap)
@@ -27,21 +27,43 @@ sub setup_signals {
 	};
 }
 
+# abs_path resolves symlinks, so we want to avoid it if rel2abs
+# is sufficient and doesn't leave "/.." or "/../"
+sub rel2abs_collapsed ($) {
+	my $p = File::Spec->rel2abs($_[0]);
+	return $p if substr($p, -3, 3) ne '/..' && index($p, '/../') < 0; # likely
+	require Cwd;
+	Cwd::abs_path($p);
+}
+
 sub resolve_inboxdir {
 	my ($cd, $ver) = @_;
-	my $prefix = defined $cd ? $cd : './';
-	if (-d $prefix && -f "$prefix/inbox.lock") { # v2
-		$$ver = 2 if $ver;
-		return abs_path($prefix);
+	my $try = $cd // '.';
+	my $root_dev_ino;
+	while (1) { # favor v2, first
+		if (-f "$try/inbox.lock") {
+			$$ver = 2 if $ver;
+			return rel2abs_collapsed($try);
+		} elsif (-d $try) {
+			my @try = stat _;
+			$root_dev_ino //= do {
+				my @root = stat('/') or die "stat /: $!\n";
+				"$root[0]\0$root[1]";
+			};
+			last if "$try[0]\0$try[1]" eq $root_dev_ino;
+			$try .= '/..'; # continue, cd up
+		} else {
+			die "`$try' is not a directory\n";
+		}
 	}
+	# try v1 bare git dirs
 	my $cmd = [ qw(git rev-parse --git-dir) ];
 	my $fh = popen_rd($cmd, undef, {-C => $cd});
 	my $dir = do { local $/; <$fh> };
-	close $fh or die "error in ".join(' ', @$cmd)." (cwd:$cd): $!\n";
+	close $fh or die "error in @$cmd (cwd:${\($cd // '.')}): $!\n";
 	chomp $dir;
 	$$ver = 1 if $ver;
-	return abs_path($cd) if ($dir eq '.' && defined $cd);
-	abs_path($dir);
+	rel2abs_collapsed($dir eq '.' ? ($cd // $dir) : $dir);
 }
 
 # for unconfigured inboxes
@@ -78,8 +100,8 @@ sub unconfigured_ibx ($$) {
 		name => $name,
 		address => [ "$name\@example.com" ],
 		inboxdir => $dir,
-		# TODO: consumers may want to warn on this:
-		#-unconfigured => 1,
+		# consumers (-convert) warn on this:
+		-unconfigured => 1,
 	});
 }
 
@@ -95,41 +117,53 @@ sub resolve_inboxes ($;$$) {
 	}
 
 	my $min_ver = $opt->{-min_inbox_version} || 0;
+	# lookup inboxes by st_dev + st_ino instead of {inboxdir} pathnames,
+	# pathnames are not unique due to symlinks and bind mounts
 	my (@old, @ibxs);
-	my %dir2ibx;
-	my $all = $opt->{all} ? [] : undef;
-	if ($cfg) {
+	if ($opt->{all}) {
 		$cfg->each_inbox(sub {
 			my ($ibx) = @_;
-			my $path = abs_path($ibx->{inboxdir});
-			if (defined($path)) {
-				$dir2ibx{$path} = $ibx;
-				push @$all, $ibx if $all;
+			if (-e $ibx->{inboxdir}) {
+				push(@ibxs, $ibx) if $ibx->version >= $min_ver;
 			} else {
-				warn <<EOF;
-W: $ibx->{name} $ibx->{inboxdir}: $!
-EOF
+				warn "W: $ibx->{name} $ibx->{inboxdir}: $!\n";
 			}
 		});
-	}
-	if ($all) {
-		@$all = grep { $_->version >= $min_ver } @$all;
-		@ibxs = @$all;
 	} else { # directories specified on the command-line
-		my $i = 0;
 		my @dirs = @$argv;
 		push @dirs, '.' if !@dirs && $opt->{-use_cwd};
-		foreach (@dirs) {
-			my $v;
-			my $dir = resolve_inboxdir($_, \$v);
-			if ($v < $min_ver) {
+		my %s2i; # "st_dev\0st_ino" => array index
+		for (my $i = 0; $i <= $#dirs; $i++) {
+			my $dir = $dirs[$i];
+			my @st = stat($dir) or die "stat($dir): $!\n";
+			$dir = resolve_inboxdir($dir, \(my $ver));
+			if ($ver >= $min_ver) {
+				$s2i{"$st[0]\0$st[1]"} //= $i;
+			} else {
 				push @old, $dir;
-				next;
 			}
-			my $ibx = $dir2ibx{$dir} ||= unconfigured_ibx($dir, $i);
-			$i++;
-			push @ibxs, $ibx;
 		}
+		my $done = \'done';
+		eval {
+			$cfg->each_inbox(sub {
+				my ($ibx) = @_;
+				return if $ibx->version < $min_ver;
+				my $dir = $ibx->{inboxdir};
+				if (my @s = stat $dir) {
+					my $i = delete($s2i{"$s[0]\0$s[1]"})
+						// return;
+					$ibxs[$i] = $ibx;
+					die $done if !keys(%s2i);
+				} else {
+					warn "W: $ibx->{name} $dir: $!\n";
+				}
+			});
+		};
+		die $@ if $@ && $@ ne $done;
+		for my $i (sort { $a <=> $b } values %s2i) {
+			$ibxs[$i] = unconfigured_ibx($dirs[$i], $i);
+		}
+		@ibxs = grep { defined } @ibxs; # duplicates are undef
 	}
 	if (@old) {
 		die "-V$min_ver inboxes not supported by $0\n\t",
diff --git a/script/public-inbox-convert b/script/public-inbox-convert
index b61c743f..fbd527a6 100755
--- a/script/public-inbox-convert
+++ b/script/public-inbox-convert
@@ -47,34 +47,21 @@ die $help if (scalar(@ARGV) || $new_dir eq '' || $old_dir eq '');
 die "$new_dir exists\n" if -d $new_dir;
 die "$old_dir not a directory\n" unless -d $old_dir;
 
-require Cwd;
-Cwd->import('abs_path');
+require PublicInbox::Admin;
 require PublicInbox::Config;
 require PublicInbox::InboxWritable;
 
-my $abs = abs_path($old_dir);
-die "failed to resolve $old_dir: $!\n" if (!defined($abs));
-
 my $cfg = PublicInbox::Config->new;
-my $old;
-$cfg->each_inbox(sub {
-	$old = $_[0] if abs_path($_[0]->{inboxdir}) eq $old_dir;
-});
-if ($old) {
-	$old = PublicInbox::InboxWritable->new($old);
-} else {
+my @old = PublicInbox::Admin::resolve_inboxes([$old_dir], undef, $cfg);
+@old > 1 and die "BUG: resolved several inboxes from $old_dir:\n",
+		map { "\t$_->{inboxdir}\n" } @old;
+my $old = PublicInbox::InboxWritable->new($old[0]);
+if (delete $old->{-unconfigured}) {
 	warn "W: $old_dir not configured in " .
 		PublicInbox::Config::default_file() . "\n";
-	$old = PublicInbox::InboxWritable->new({
-		inboxdir => $old_dir,
-		name => 'ignored',
-		-primary_address => 'old@example.com',
-		address => [ 'old@example.com' ],
-	});
 }
 die "Only conversion from v1 inboxes is supported\n" if $old->version >= 2;
 
-require File::Spec;
 require PublicInbox::Admin;
 my $detected = PublicInbox::Admin::detect_indexlevel($old);
 $old->{indexlevel} //= $detected;
@@ -88,7 +75,7 @@ if ($opt->{'index'}) {
 }
 local %ENV = (%$env, %ENV) if $env;
 my $new = { %$old };
-$new->{inboxdir} = File::Spec->canonpath($new_dir);
+$new->{inboxdir} = PublicInbox::Admin::rel2abs_collapsed($new_dir);
 $new->{version} = 2;
 $new = PublicInbox::InboxWritable->new($new, { nproc => $opt->{jobs} });
 $new->{-no_fsync} = 1 if !$opt->{fsync};
diff --git a/script/public-inbox-init b/script/public-inbox-init
index c775eb31..eb605a51 100755
--- a/script/public-inbox-init
+++ b/script/public-inbox-init
@@ -138,10 +138,9 @@ close($fh) or die "failed to close $pi_config_tmp: $!\n";
 my $pfx = "publicinbox.$name";
 my @x = (qw/git config/, "--file=$pi_config_tmp");
 
-require File::Spec;
-$inboxdir = File::Spec->canonpath($inboxdir);
+PublicInbox::Admin::rel2abs_collapsed($inboxdir);
+die "`\\n' not allowed in `$inboxdir'\n" if index($inboxdir, "\n") >= 0;
 
-die "`\\n' not allowed in `$inboxdir'\n" if $inboxdir =~ /\n/s;
 if (-f "$inboxdir/inbox.lock") {
 	if (!defined $version) {
 		$version = 2;
@@ -186,11 +185,6 @@ if ($skip_docdata) {
 	$ibx->{-skip_docdata} = $skip_docdata;
 }
 $ibx->init_inbox(0, $skip_epoch, $skip_artnum);
-require Cwd;
-my $tmp = Cwd::abs_path($inboxdir);
-defined($tmp) or die "failed to resolve $inboxdir: $!\n";
-$inboxdir = $tmp;
-die "`\\n' not allowed in `$inboxdir'\n" if $inboxdir =~ /\n/s;
 
 # needed for git prior to v2.1.0
 umask(0077) if defined $perm;
diff --git a/t/admin.t b/t/admin.t
index af132577..60c6037d 100644
--- a/t/admin.t
+++ b/t/admin.t
@@ -12,10 +12,7 @@ my $v2_dir = "$tmpdir/v2";
 my ($res, $err, $v);
 
 PublicInbox::Import::init_bare($git_dir);
-*resolve_inboxdir = do {
-	no warnings 'once';
-	*PublicInbox::Admin::resolve_inboxdir;
-};
+*resolve_inboxdir = \&PublicInbox::Admin::resolve_inboxdir;
 
 # v1
 is(resolve_inboxdir($git_dir), $git_dir, 'top-level GIT_DIR resolved');
@@ -72,16 +69,23 @@ SKIP: {
 	ok(-e "$v2_dir/inbox.lock", 'exists');
 	is(resolve_inboxdir($v2_dir), $v2_dir,
 		'resolve_inboxdir works on v2_dir');
-	ok(chdir($v2_dir), 'chdir v2_dir OK');
+	chdir($v2_dir) or BAIL_OUT "chdir v2_dir: $!";
 	is(resolve_inboxdir(), $v2_dir, 'resolve_inboxdir works inside v2_dir');
 	$res = resolve_inboxdir(undef, \$v);
 	is($v, 2, 'version 2 detected');
 	is($res, $v2_dir, 'detects directory along with version');
 
 	# TODO: should work from inside Xapian dirs, and git dirs, here...
+	PublicInbox::Import::init_bare("$v2_dir/git/0.git");
+	my $objdir = "$v2_dir/git/0.git/objects";
+	is($v2_dir, resolve_inboxdir($objdir, \$v), 'at $objdir');
+	is($v, 2, 'version 2 detected at $objdir');
+	chdir($objdir) or BAIL_OUT "chdir objdir: $!";
+	is(resolve_inboxdir(undef, \$v), $v2_dir, 'inside $objdir');
+	is($v, 2, 'version 2 detected inside $objdir');
 }
 
-chdir '/';
+chdir '/' or BAIL_OUT "chdir: $!";
 
 my @pairs = (
 	'1g' => 1024 ** 3,

  reply	other threads:[~2020-12-20  6:30 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-12-20  6:30 [PATCH 0/6] path usability and some tiny cleanups Eric Wong
2020-12-20  6:30 ` Eric Wong [this message]
2020-12-20  6:30 ` [PATCH 2/6] inboxidle: remove needless check for {inboxdir} Eric Wong
2020-12-20  6:30 ` [PATCH 3/6] daemon: lazy load Cwd only for --daemonize users Eric Wong
2020-12-20  6:30 ` [PATCH 4/6] daemon: unconditionally call IO::Handle::blocking(0) Eric Wong
2020-12-20  6:30 ` [PATCH 5/6] daemon: kill_workers: eliminate unnecessary loop Eric Wong
2020-12-20  6:30 ` [PATCH 6/6] config: eliminate unnecessary join call up front Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201220063016.20959-2-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).