unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 1/5] lei: All Local Externals: bare git dir for alternates
Date: Sat, 20 Mar 2021 19:04:03 +0900	[thread overview]
Message-ID: <20210320100407.15713-2-e@80x24.org> (raw)
In-Reply-To: <20210320100407.15713-1-e@80x24.org>

This will be used for keyword (and label) storage for externals.
We'll be using this to ensure we don't redundantly auto-import
messages into lei/store if they're already in a local external
(they can still be imported explicitly via "lei import").
---
 MANIFEST                       |  1 +
 lib/PublicInbox/LEI.pm         | 16 ++++++
 lib/PublicInbox/LeiALE.pm      | 98 ++++++++++++++++++++++++++++++++++
 lib/PublicInbox/LeiExternal.pm |  6 +++
 lib/PublicInbox/LeiOverview.pm |  3 +-
 lib/PublicInbox/LeiQuery.pm    |  5 ++
 lib/PublicInbox/LeiStore.pm    |  5 +-
 lib/PublicInbox/LeiToMail.pm   | 10 ++--
 lib/PublicInbox/LeiXSearch.pm  | 27 +---------
 lib/PublicInbox/Lock.pm        |  2 +-
 t/lei-externals.t              |  3 +-
 t/lei_xsearch.t                | 22 +++++++-
 12 files changed, 158 insertions(+), 40 deletions(-)
 create mode 100644 lib/PublicInbox/LeiALE.pm

diff --git a/MANIFEST b/MANIFEST
index 775de5cd..b6b4a3ab 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -179,6 +179,7 @@ lib/PublicInbox/InputPipe.pm
 lib/PublicInbox/Isearch.pm
 lib/PublicInbox/KQNotify.pm
 lib/PublicInbox/LEI.pm
+lib/PublicInbox/LeiALE.pm
 lib/PublicInbox/LeiAuth.pm
 lib/PublicInbox/LeiConvert.pm
 lib/PublicInbox/LeiCurl.pm
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index d20ba744..0da26a32 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -97,6 +97,22 @@ sub _config_path ($) {
 		.'/lei/config');
 }
 
+sub cache_dir ($) {
+	my ($self) = @_;
+	rel2abs($self, ($self->{env}->{XDG_CACHE_HOME} //
+		($self->{env}->{HOME} // '/nonexistent').'/.cache')
+		.'/lei');
+}
+
+sub ale {
+	my ($self) = @_;
+	$self->{ale} //= do {
+		require PublicInbox::LeiALE;
+		PublicInbox::LeiALE->new(cache_dir($self).
+					'/all_locals_ever.git');
+	};
+}
+
 sub index_opt {
 	# TODO: drop underscore variants everywhere, they're undocumented
 	qw(fsync|sync! jobs|j=i indexlevel|L=s compact
diff --git a/lib/PublicInbox/LeiALE.pm b/lib/PublicInbox/LeiALE.pm
new file mode 100644
index 00000000..bdb50a1a
--- /dev/null
+++ b/lib/PublicInbox/LeiALE.pm
@@ -0,0 +1,98 @@
+# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# All Locals Ever: track lei/store + externals ever used as
+# long as they're on an accessible FS.  Includes "lei q" --include
+# and --only targets that haven't been through "lei add-external".
+# Typically: ~/.cache/lei/all_locals_ever.git
+package PublicInbox::LeiALE;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::LeiSearch PublicInbox::Lock);
+use PublicInbox::Git;
+use PublicInbox::Import;
+use Fcntl qw(SEEK_SET);
+
+sub new {
+	my ($cls, $d) = @_;
+	PublicInbox::Import::init_bare($d, 'ale');
+	bless {
+		git => PublicInbox::Git->new($d),
+		lock_path => "$d/lei_ale.state", # dual-duty lock + state
+		ibxish => [], # Inbox and ExtSearch (and LeiSearch) objects
+	}, $cls;
+}
+
+sub over {} # undef for xoids_for
+
+sub overs_all { # for xoids_for (called only in lei workers?)
+	my ($self) = @_;
+	my $pid = $$;
+	if (($self->{owner_pid} // $pid) != $pid) {
+		delete($_->{over}) for @{$self->{ibxish}};
+	}
+	$self->{owner_pid} = $pid;
+	grep(defined, map { $_->over } @{$self->{ibxish}});
+}
+
+sub refresh_externals {
+	my ($self, $lxs) = @_;
+	$self->git->cleanup;
+	my $lk = $self->lock_for_scope;
+	my $cur_lxs = ref($lxs)->new;
+	my $orig = do {
+		local $/;
+		readline($self->{lockfh}) //
+				die "readline($self->{lock_path}): $!";
+	};
+	my $new = '';
+	my $old = '';
+	my $gone = 0;
+	my %seen_ibxish; # $dir => any-defined value
+	for my $dir (split(/\n/, $orig)) {
+		if (-d $dir && -r _ && $cur_lxs->prepare_external($dir)) {
+			$seen_ibxish{$dir} //= length($old .= "$dir\n");
+		} else {
+			++$gone;
+		}
+	}
+	my @ibxish = $cur_lxs->locals;
+	for my $x ($lxs->locals) {
+		my $d = File::Spec->canonpath($x->{inboxdir} // $x->{topdir});
+		$seen_ibxish{$d} //= do {
+			$new .= "$d\n";
+			push @ibxish, $x;
+		};
+	}
+	if ($new ne '' || $gone) {
+		$self->{lockfh}->autoflush(1);
+		if ($gone) {
+			seek($self->{lockfh}, 0, SEEK_SET) or die "seek: $!";
+			truncate($self->{lockfh}, 0) or die "truncate: $!";
+		} else {
+			$old = '';
+		}
+		print { $self->{lockfh} } $old, $new or die "print: $!";
+	}
+	$new = $old = '';
+	my $f = $self->git->{git_dir}.'/objects/info/alternates';
+	if (open my $fh, '<', $f) {
+		local $/;
+		$old = <$fh> // die "readline($f): $!";
+	}
+	for my $x (@ibxish) {
+		$new .= File::Spec->canonpath($x->git->{git_dir})."/objects\n";
+	}
+	$self->{ibxish} = \@ibxish;
+	return if $old eq $new;
+
+	# this needs to be atomic since child processes may start
+	# git-cat-file at any time
+	my $tmp = "$f.$$.tmp";
+	open my $fh, '>', $tmp or die "open($tmp): $!";
+	print $fh $new or die "print($tmp): $!";
+	close $fh or die "close($tmp): $!";
+	rename($tmp, $f) or die "rename($tmp, $f): $!";
+}
+
+1;
diff --git a/lib/PublicInbox/LeiExternal.pm b/lib/PublicInbox/LeiExternal.pm
index b5dd85e1..aa09be9e 100644
--- a/lib/PublicInbox/LeiExternal.pm
+++ b/lib/PublicInbox/LeiExternal.pm
@@ -139,6 +139,12 @@ sub add_external_finish {
 	my $key = "external.$location.boost";
 	my $cur_boost = $cfg->{$key};
 	return if defined($cur_boost) && $cur_boost == $new_boost; # idempotent
+	if (-d $location) {
+		require PublicInbox::LeiXSearch;
+		my $lxs = PublicInbox::LeiXSearch->new;
+		$lxs->prepare_external($location);
+		$self->ale->refresh_externals($lxs);
+	}
 	$self->lei_config($key, $new_boost);
 }
 
diff --git a/lib/PublicInbox/LeiOverview.pm b/lib/PublicInbox/LeiOverview.pm
index f6348162..1036f465 100644
--- a/lib/PublicInbox/LeiOverview.pm
+++ b/lib/PublicInbox/LeiOverview.pm
@@ -209,11 +209,10 @@ sub ovv_each_smsg_cb { # runs in wq worker usually
 			$wcb->(undef, $smsg, $eml);
 		};
 	} elsif ($l2m && $l2m->{-wq_s1}) {
-		my $git_dir = $ibxish->git->{git_dir};
 		sub {
 			my ($smsg, $mitem) = @_;
 			$smsg->{pct} = get_pct($mitem) if $mitem;
-			$l2m->wq_io_do('write_mail', [], $git_dir, $smsg);
+			$l2m->wq_io_do('write_mail', [], $smsg);
 		}
 	} elsif ($self->{fmt} =~ /\A(concat)?json\z/ && $lei->{opt}->{pretty}) {
 		my $EOR = ($1//'') eq 'concat' ? "\n}" : "\n},";
diff --git a/lib/PublicInbox/LeiQuery.pm b/lib/PublicInbox/LeiQuery.pm
index 532668ae..007e35fc 100644
--- a/lib/PublicInbox/LeiQuery.pm
+++ b/lib/PublicInbox/LeiQuery.pm
@@ -57,6 +57,10 @@ sub lei_q {
 	}
 	if ($opt->{'local'} //= scalar(@only) ? 0 : 1) {
 		$lxs->prepare_external($lse);
+	} else {
+		my $tmp = PublicInbox::LeiXSearch->new;
+		$tmp->prepare_external($lse);
+		$self->ale->refresh_externals($tmp);
 	}
 	if (@only) {
 		for my $loc (@only) {
@@ -90,6 +94,7 @@ sub lei_q {
 	unless ($lxs->locals || $lxs->remotes) {
 		return $self->fail('no local or remote inboxes to search');
 	}
+	$self->ale->refresh_externals($lxs);
 	my ($xj, $mj) = split(/,/, $opt->{jobs} // '');
 	if (defined($xj) && $xj ne '' && $xj !~ /\A[1-9][0-9]*\z/) {
 		return $self->fail("`$xj' search jobs must be >= 1");
diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm
index 26f975c3..c1abc288 100644
--- a/lib/PublicInbox/LeiStore.pm
+++ b/lib/PublicInbox/LeiStore.pm
@@ -251,10 +251,11 @@ sub refresh_local_externals {
 		for my $loc (@loc) { # locals only
 			$lxs->prepare_external($loc) if -d $loc;
 		}
+		$self->{lei}->ale->refresh_externals($lxs);
+		$lxs->{git} = $self->{lei}->ale->git;
 		$self->{lxs_all_local} = $lxs;
 		$self->{cur_cfg} = $cfg;
 	}
-	($lxs->{git_tmp} //= $lxs->git_tmp)->{git_dir};
 }
 
 sub write_prepare {
@@ -268,7 +269,7 @@ sub write_prepare {
 		$self->ipc_worker_spawn('lei_store', $lei->oldset,
 					{ lei => $lei });
 	}
-	$lei->{all_ext_git_dir} = $self->ipc_do('refresh_local_externals');
+	my $wait = $self->ipc_do('refresh_local_externals');
 	$lei->{sto} = $self;
 }
 
diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index 6f386b10..7e821646 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -11,7 +11,6 @@ use PublicInbox::Lock;
 use PublicInbox::ProcessPipe;
 use PublicInbox::Spawn qw(which spawn popen_rd);
 use PublicInbox::LeiDedupe;
-use PublicInbox::Git;
 use PublicInbox::GitAsyncCat;
 use PublicInbox::PktOp qw(pkt_do);
 use Symbol qw(gensym);
@@ -642,18 +641,15 @@ sub poke_dst {
 }
 
 sub write_mail { # via ->wq_io_do
-	my ($self, $git_dir, $smsg) = @_;
-	my $git = $self->{"$$\0$git_dir"} //= PublicInbox::Git->new($git_dir);
-	git_async_cat($git, $smsg->{blob}, \&git_to_mail,
+	my ($self, $smsg) = @_;
+	git_async_cat($self->{lei}->{ale}->git, $smsg->{blob}, \&git_to_mail,
 				[$self->{wcb}, $smsg]);
 }
 
 sub wq_atexit_child {
 	my ($self) = @_;
 	delete $self->{wcb};
-	for my $git (delete @$self{grep(/\A$$\0/, keys %$self)}) {
-		$git->async_wait_all;
-	}
+	$self->{lei}->{ale}->git->async_wait_all;
 	$SIG{__WARN__} = 'DEFAULT';
 }
 
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index d95a218e..1266b3b3 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -297,27 +297,7 @@ sub query_remote_mboxrd {
 	$lei->{ovv}->ovv_atexit_child($lei);
 }
 
-# called by LeiOverview::each_smsg_cb
-sub git { $_[0]->{git_tmp} // die 'BUG: caller did not set {git_tmp}' }
-
-sub git_tmp ($) {
-	my ($self) = @_;
-	my (%seen, @dirs);
-	my $tmp = File::Temp->newdir("lei_xsearch_git.$$-XXXX", TMPDIR => 1);
-	for my $ibxish (locals($self)) {
-		my $d = File::Spec->canonpath($ibxish->git->{git_dir});
-		$seen{$d} //= push @dirs, "$d/objects\n"
-	}
-	my $git_dir = $tmp->dirname;
-	PublicInbox::Import::init_bare($git_dir);
-	my $f = "$git_dir/objects/info/alternates";
-	open my $alt, '>', $f or die "open($f): $!";
-	print $alt @dirs or die "print $f: $!";
-	close $alt or die "close $f: $!";
-	my $git = PublicInbox::Git->new($git_dir);
-	$git->{-tmp} = $tmp;
-	$git;
-}
+sub git { $_[0]->{git} // die 'BUG: git uninitialized' }
 
 sub xsearch_done_wait { # dwaitpid callback
 	my ($arg, $pid) = @_;
@@ -460,11 +440,6 @@ sub do_query {
 		# 1031: F_SETPIPE_SZ
 		fcntl($lei->{startq}, 1031, 4096) if $^O eq 'linux';
 	}
-	if (!$lei->{opt}->{threads} && locals($self)) { # for query_mset
-		# lei->{git_tmp} is set for wq_wait_old so we don't
-		# delete until all lei2mail + lei_xsearch workers are reaped
-		$lei->{git_tmp} = $self->{git_tmp} = git_tmp($self);
-	}
 	$self->wq_workers_start('lei_xsearch', undef,
 				$lei->oldset, { lei => $lei });
 	my $op = delete $lei->{pkt_op_c};
diff --git a/lib/PublicInbox/Lock.pm b/lib/PublicInbox/Lock.pm
index 76c3ffb2..0ee2a8bd 100644
--- a/lib/PublicInbox/Lock.pm
+++ b/lib/PublicInbox/Lock.pm
@@ -16,7 +16,7 @@ sub lock_acquire {
 	my $lock_path = $self->{lock_path};
 	croak 'already locked '.($lock_path // '(undef)') if $self->{lockfh};
 	return unless defined($lock_path);
-	sysopen(my $lockfh, $lock_path, O_WRONLY|O_CREAT) or
+	sysopen(my $lockfh, $lock_path, O_RDWR|O_CREAT) or
 		croak "failed to open $lock_path: $!\n";
 	flock($lockfh, LOCK_EX) or croak "lock $lock_path failed: $!\n";
 	$self->{lockfh} = $lockfh;
diff --git a/t/lei-externals.t b/t/lei-externals.t
index 1d2a9a16..2045691f 100644
--- a/t/lei-externals.t
+++ b/t/lei-externals.t
@@ -236,7 +236,8 @@ test_lei(sub {
 		is(scalar(@s), 2, "2 results in mbox$sfx");
 
 		lei_ok('q', '-a', '-o', "mboxcl2:$f", 's:nonexistent');
-		is(grep(!/^#/, $lei_err), 0, "no errors on no results ($sfx)");
+		is(grep(!/^#/, $lei_err), 0, "no errors on no results ($sfx)")
+			or diag $lei_err;
 
 		my @s2 = grep(/^Subject:/, $cat->());
 		is_deeply(\@s2, \@s,
diff --git a/t/lei_xsearch.t b/t/lei_xsearch.t
index f626c790..68211d18 100644
--- a/t/lei_xsearch.t
+++ b/t/lei_xsearch.t
@@ -10,6 +10,7 @@ require_mods(qw(DBD::SQLite Search::Xapian));
 require PublicInbox::ExtSearchIdx;
 require_git 2.6;
 require_ok 'PublicInbox::LeiXSearch';
+require_ok 'PublicInbox::LeiALE';
 my ($home, $for_destroy) = tmpdir();
 my @ibx;
 for my $V (1..2) {
@@ -75,7 +76,8 @@ is($lxs->over, undef, '->over fails');
 	my $v2ibx = create_inbox 'v2full', version => 2, sub {
 		$_[0]->add(eml_load('t/plack-qp.eml'));
 	};
-	my $v1ibx = create_inbox 'v1medium', indexlevel => 'medium', sub {
+	my $v1ibx = create_inbox 'v1medium', indexlevel => 'medium',
+				tmpdir => "$home/v1tmp", sub {
 		$_[0]->add(eml_load('t/utf8.eml'));
 	};
 	$lxs->prepare_external($v1ibx);
@@ -85,6 +87,24 @@ is($lxs->over, undef, '->over fails');
 	}
 	my $mset = $lxs->mset('m:testmessage@example.com');
 	is($mset->size, 1, 'got m: match on medium+full XSearch mix');
+	my $mitem = ($mset->items)[0];
+	my $smsg = $lxs->smsg_for($mitem) or BAIL_OUT 'smsg_for broken';
+
+	my $ale = PublicInbox::LeiALE->new("$home/ale");
+	$ale->refresh_externals($lxs);
+	my $exp = [ $smsg->{blob}, 'blob', -s 't/utf8.eml' ];
+	is_deeply([ $ale->git->check($smsg->{blob}) ], $exp, 'ale->git->check');
+
+	$lxs = PublicInbox::LeiXSearch->new;
+	$lxs->prepare_external($v2ibx);
+	$ale->refresh_externals($lxs);
+	is_deeply([ $ale->git->check($smsg->{blob}) ], $exp,
+			'ale->git->check remembered inactive external');
+
+	rename("$home/v1tmp", "$home/v1moved") or BAIL_OUT "rename: $!";
+	$ale->refresh_externals($lxs);
+	is($ale->git->check($smsg->{blob}), undef,
+			'missing after directory gone');
 }
 
 done_testing;

  reply	other threads:[~2021-03-20 10:04 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-03-20 10:04 [PATCH 0/5] lei: preserve keywords across queries Eric Wong
2021-03-20 10:04 ` Eric Wong [this message]
2021-03-20 10:04 ` [PATCH 2/5] lei q: support vmd for external-only messages Eric Wong
2021-03-20 10:04 ` [PATCH 3/5] lei q: put keywords on one line in --pretty output Eric Wong
2021-03-20 10:04 ` [PATCH 4/5] lei_to_mail: match mutt order of status headers Eric Wong
2021-03-20 10:04 ` [PATCH 5/5] lei: tie ALE lifetime to config file Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210320100407.15713-2-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).