unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 9/9] lei import|tag|rm: support --commit-delay=SECONDS
Date: Wed, 11 Oct 2023 07:20:57 +0000	[thread overview]
Message-ID: <20231011072057.758022-10-e@80x24.org> (raw)
In-Reply-To: <20231011072057.758022-1-e@80x24.org>

Delayed commits  allows users to trade off immediate safety for
throughput and reduced storage wear when running multiple
discreet commands.

This feature is currently useful for providing a way to make
t/lei-store-fail.t reliable and for ensuring `lei blob' can
retrieve messages which have not yet been committed.

In the future, it'll also be useful for the FUSE layer to batch
git activity.
---
 lib/PublicInbox/LEI.pm      | 23 ++++++++++++++---------
 lib/PublicInbox/LeiStore.pm |  6 ++++++
 t/lei-import.t              | 13 +++++++++++++
 t/lei-store-fail.t          | 20 +++++++++++++-------
 t/lei-tag.t                 | 15 ++++++++++++++-
 5 files changed, 60 insertions(+), 17 deletions(-)

diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index e2b3c0d9..af39f8af 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -231,13 +231,13 @@ our %CMD = ( # sorted in order of importance/use:
 'rm' => [ '--stdin|LOCATION...',
 	'remove a message from the index and prevent reindexing',
 	'stdin|', # /|\z/ must be first for lone dash
-	qw(in-format|F=s lock=s@), @net_opt, @c_opt ],
+	qw(in-format|F=s lock=s@ commit-delay=i), @net_opt, @c_opt ],
 'plonk' => [ '--threads|--from=IDENT',
 	'exclude mail matching From: or threads from non-Message-ID searches',
 	qw(stdin| threads|t from|f=s mid=s oid=s), @c_opt ],
-'tag' => [ 'KEYWORDS... LOCATION...|--stdin',
+tag => [ 'KEYWORDS... LOCATION...|--stdin',
 	'set/unset keywords and/or labels on message(s)',
-	qw(stdin| in-format|F=s input|i=s@ oid=s@ mid=s@),
+	qw(stdin| in-format|F=s input|i=s@ oid=s@ mid=s@ commit-delay=i),
 	@net_opt, @c_opt, pass_through('-kw:foo for delete') ],
 
 'purge-mailsource' => [ 'LOCATION|--all',
@@ -262,10 +262,11 @@ our %CMD = ( # sorted in order of importance/use:
 	qw(in-format|F=s kw! offset=i recursive|r exclude=s include|I=s
 	verbose|v+ incremental!), @net_opt, # mainly for --proxy=
 	 @c_opt ],
-'import' => [ 'LOCATION...|--stdin [LABELS...]',
+import => [ 'LOCATION...|--stdin [LABELS...]',
 	'one-time import/update from URL or filesystem',
 	qw(stdin| offset=i recursive|r exclude=s include|I=s new-only
-	lock=s@ in-format|F=s kw! verbose|v+ incremental! mail-sync!),
+	lock=s@ in-format|F=s kw! verbose|v+ incremental! mail-sync!
+	commit-delay=i),
 	@net_opt, @c_opt ],
 'forget-mail-sync' => [ 'LOCATION...',
 	'forget sync information for a mail folder', @c_opt ],
@@ -1539,10 +1540,14 @@ sub sto_done_request {
 	my ($lei, $wq) = @_;
 	return unless $lei->{sto} && $lei->{sto}->{-wq_s1};
 	local $current_lei = $lei;
-	my $s = ($wq ? $wq->{lei_sock} : undef) // $lei->{sock};
-	my $errfh = $lei->{2} // *STDERR{GLOB};
-	my @io = $s ? ($errfh, $s) : ($errfh);
-	eval { $lei->{sto}->wq_io_do('done', \@io) };
+	if (my $n = $lei->{opt}->{'commit-delay'}) {
+		eval { $lei->{sto}->wq_do('schedule_commit', $n) };
+	} else {
+		my $s = ($wq ? $wq->{lei_sock} : undef) // $lei->{sock};
+		my $errfh = $lei->{2} // *STDERR{GLOB};
+		my @io = $s ? ($errfh, $s) : ($errfh);
+		eval { $lei->{sto}->wq_io_do('done', \@io) };
+	}
 	warn($@) if $@;
 }
 
diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm
index 9c07af14..aebb85a9 100644
--- a/lib/PublicInbox/LeiStore.pm
+++ b/lib/PublicInbox/LeiStore.pm
@@ -34,6 +34,7 @@ use Sys::Syslog qw(syslog openlog);
 use Errno qw(EEXIST ENOENT);
 use PublicInbox::Syscall qw(rename_noreplace);
 use PublicInbox::LeiStoreErr;
+use PublicInbox::DS qw(add_uniq_timer);
 
 sub new {
 	my (undef, $dir, $opt) = @_;
@@ -113,6 +114,11 @@ sub cat_blob {
 	$self->{im} ? $self->{im}->cat_blob($oid) : undef;
 }
 
+sub schedule_commit {
+	my ($self, $sec) = @_;
+	add_uniq_timer($self->{priv_eidx}->{topdir}, $sec, \&done, $self);
+}
+
 # follows the stderr file
 sub _tail_err {
 	my ($self) = @_;
diff --git a/t/lei-import.t b/t/lei-import.t
index 8b09d3aa..b2c1de9b 100644
--- a/t/lei-import.t
+++ b/t/lei-import.t
@@ -2,6 +2,7 @@
 # Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 use v5.12; use PublicInbox::TestCommon;
+use PublicInbox::DS qw(now);
 use autodie qw(open close);
 test_lei(sub {
 ok(!lei(qw(import -F bogus), 't/plack-qp.eml'), 'fails with bogus format');
@@ -141,6 +142,18 @@ $res = json_utf8->decode($lei_out);
 is_deeply($res->[0]->{kw}, [qw(answered flagged seen)], 'keyword added');
 is_deeply($res->[0]->{L}, [qw(boombox inbox)], 'labels preserved');
 
+lei_ok qw(import --commit-delay=1 +L:bin -F eml t/data/binary.patch);
+lei_ok 'ls-label';
+unlike($lei_out, qr/\bbin\b/, 'commit-delay delays label');
+my $end = now + 10;
+my $n = 1;
+diag 'waiting for lei/store commit...';
+do {
+	tick $n;
+	$n = 0.1;
+} until (!lei('ls-label') || $lei_out =~ /\bbin\b/ || now > $end);
+like($lei_out, qr/\bbin\b/, 'commit-delay eventually commits');
+
 # see t/lei_to_mail.t for "import -F mbox*"
 });
 done_testing;
diff --git a/t/lei-store-fail.t b/t/lei-store-fail.t
index fb0f2b75..c2f03148 100644
--- a/t/lei-store-fail.t
+++ b/t/lei-store-fail.t
@@ -9,8 +9,11 @@ use Fcntl qw(SEEK_SET);
 use File::Path qw(remove_tree);
 
 my $start_home = $ENV{HOME}; # bug guard
+my $utf8_oid = '9bf1002c49eb075df47247b74d69bcd555e23422';
 test_lei(sub {
 	lei_ok qw(import -q t/plack-qp.eml); # start the store
+	ok(!lei(qw(blob --mail), $utf8_oid), 't/utf8.eml not imported, yet');
+
 	my $opt;
 	pipe($opt->{0}, my $in_w);
 	open $opt->{1}, '+>', undef;
@@ -20,27 +23,30 @@ test_lei(sub {
 	my $tp = start_script($cmd, undef, $opt);
 	close $opt->{0};
 	$in_w->autoflush(1);
-	for (1..500) { # need to fill up 64k read buffer
-		print $in_w <<EOM or xbail "print $!";
+	print $in_w <<EOM or xbail "print: $!";
 From k\@y Fri Oct  2 00:00:00 1993
 From: <k\@example.com>
 Date: Sat, 02 Oct 2010 00:00:00 +0000
 Subject: hi
-Message-ID: <$_\@t>
+Message-ID: <0\@t>
 
 will this save?
 EOM
-	}
-	tick 0.2; # XXX ugh, this is so hacky
+	# import another message w/ delay while mboxrd import is still running
+	lei_ok qw(import -q --commit-delay=300 t/utf8.eml);
+	lei_ok qw(blob --mail), $utf8_oid,
+		\'blob immediately available despite --commit-delay';
+	lei_ok qw(q m:testmessage@example.com);
+	is($lei_out, "[null]\n", 'delayed commit is unindexed');
 
-	# make sto_done_request fail:
+	# make immediate ->sto_done_request fail from mboxrd import:
 	remove_tree("$ENV{HOME}/.local/share/lei/store");
 	# subsequent lei commands are undefined behavior,
 	# but we need to make sure the current lei command fails:
 
 	close $in_w; # should trigger ->done
 	$tp->join;
-	isnt($?, 0, 'lei import error code set on failure');
+	isnt($?, 0, 'lei import -F mboxrd error code set on failure');
 	is(-s $opt->{1}, 0, 'nothing in stdout');
 	isnt(-s $opt->{2}, 0, 'stderr not empty');
 	seek($opt->{2}, 0, SEEK_SET);
diff --git a/t/lei-tag.t b/t/lei-tag.t
index cccf0af6..7278dfcd 100644
--- a/t/lei-tag.t
+++ b/t/lei-tag.t
@@ -1,9 +1,10 @@
 #!perl -w
 # Copyright (C) 2021 all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-use strict; use v5.10.1; use PublicInbox::TestCommon;
+use v5.12; use PublicInbox::TestCommon;
 require_git 2.6;
 require_mods(qw(json DBD::SQLite Xapian));
+use PublicInbox::DS qw(now);
 my ($ro_home, $cfg_path) = setup_public_inboxes;
 my $check_kw = sub {
 	my ($exp, %opt) = @_;
@@ -104,5 +105,17 @@ test_lei(sub {
 	lei_ok qw(tag +L:nope -F eml t/data/binary.patch);
 	like $lei_err, qr/\b1 unimported messages/, 'noted unimported'
 		or diag $lei_err;
+
+	lei_ok qw(tag -F eml --commit-delay=1 t/utf8.eml +L:utf8);
+	lei_ok 'ls-label';
+	unlike($lei_out, qr/\butf8\b/, 'commit-delay delays label');
+	my $end = now + 10;
+	my $n = 1;
+	diag 'waiting for lei/store commit...';
+	do {
+		tick $n;
+		$n = 0.1;
+	} until (!lei('ls-label') || $lei_out =~ /\butf8\b/ || now > $end);
+	like($lei_out, qr/\butf8\b/, 'commit-delay eventually commits');
 });
 done_testing;

      parent reply	other threads:[~2023-10-11  7:20 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-10-11  7:20 [PATCH 0/9] lei + import-related updates Eric Wong
2023-10-11  7:20 ` [PATCH 1/9] lei rediff: use ProcessIO for --drq support Eric Wong
2023-10-11  7:20 ` [PATCH 2/9] lei_xsearch: improve curl progress reporting Eric Wong
2023-10-11  7:20 ` [PATCH 3/9] msgtime: quiet warnings we can do nothing about Eric Wong
2023-10-11  7:20 ` [PATCH 4/9] msgtime: simplify msg_timestamp and msg_datestamp Eric Wong
2023-10-11  7:20 ` [PATCH 5/9] treewide: consolidate "From " line removal Eric Wong
2023-10-11  7:20 ` [PATCH 6/9] import: switch to Unix stream socket for fast-import Eric Wong
2023-10-11  7:20 ` [PATCH 7/9] import: cat_blob is a no-op w/o live fast-import Eric Wong
2023-10-11  7:20 ` [PATCH 8/9] lei blob: run cat_blob on lei/store for pending blobs Eric Wong
2023-10-11  7:20 ` Eric Wong [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231011072057.758022-10-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).