From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 9/9] lei import|tag|rm: support --commit-delay=SECONDS
Date: Wed, 11 Oct 2023 07:20:57 +0000 [thread overview]
Message-ID: <20231011072057.758022-10-e@80x24.org> (raw)
In-Reply-To: <20231011072057.758022-1-e@80x24.org>
Delayed commits allows users to trade off immediate safety for
throughput and reduced storage wear when running multiple
discreet commands.
This feature is currently useful for providing a way to make
t/lei-store-fail.t reliable and for ensuring `lei blob' can
retrieve messages which have not yet been committed.
In the future, it'll also be useful for the FUSE layer to batch
git activity.
---
lib/PublicInbox/LEI.pm | 23 ++++++++++++++---------
lib/PublicInbox/LeiStore.pm | 6 ++++++
t/lei-import.t | 13 +++++++++++++
t/lei-store-fail.t | 20 +++++++++++++-------
t/lei-tag.t | 15 ++++++++++++++-
5 files changed, 60 insertions(+), 17 deletions(-)
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index e2b3c0d9..af39f8af 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -231,13 +231,13 @@ our %CMD = ( # sorted in order of importance/use:
'rm' => [ '--stdin|LOCATION...',
'remove a message from the index and prevent reindexing',
'stdin|', # /|\z/ must be first for lone dash
- qw(in-format|F=s lock=s@), @net_opt, @c_opt ],
+ qw(in-format|F=s lock=s@ commit-delay=i), @net_opt, @c_opt ],
'plonk' => [ '--threads|--from=IDENT',
'exclude mail matching From: or threads from non-Message-ID searches',
qw(stdin| threads|t from|f=s mid=s oid=s), @c_opt ],
-'tag' => [ 'KEYWORDS... LOCATION...|--stdin',
+tag => [ 'KEYWORDS... LOCATION...|--stdin',
'set/unset keywords and/or labels on message(s)',
- qw(stdin| in-format|F=s input|i=s@ oid=s@ mid=s@),
+ qw(stdin| in-format|F=s input|i=s@ oid=s@ mid=s@ commit-delay=i),
@net_opt, @c_opt, pass_through('-kw:foo for delete') ],
'purge-mailsource' => [ 'LOCATION|--all',
@@ -262,10 +262,11 @@ our %CMD = ( # sorted in order of importance/use:
qw(in-format|F=s kw! offset=i recursive|r exclude=s include|I=s
verbose|v+ incremental!), @net_opt, # mainly for --proxy=
@c_opt ],
-'import' => [ 'LOCATION...|--stdin [LABELS...]',
+import => [ 'LOCATION...|--stdin [LABELS...]',
'one-time import/update from URL or filesystem',
qw(stdin| offset=i recursive|r exclude=s include|I=s new-only
- lock=s@ in-format|F=s kw! verbose|v+ incremental! mail-sync!),
+ lock=s@ in-format|F=s kw! verbose|v+ incremental! mail-sync!
+ commit-delay=i),
@net_opt, @c_opt ],
'forget-mail-sync' => [ 'LOCATION...',
'forget sync information for a mail folder', @c_opt ],
@@ -1539,10 +1540,14 @@ sub sto_done_request {
my ($lei, $wq) = @_;
return unless $lei->{sto} && $lei->{sto}->{-wq_s1};
local $current_lei = $lei;
- my $s = ($wq ? $wq->{lei_sock} : undef) // $lei->{sock};
- my $errfh = $lei->{2} // *STDERR{GLOB};
- my @io = $s ? ($errfh, $s) : ($errfh);
- eval { $lei->{sto}->wq_io_do('done', \@io) };
+ if (my $n = $lei->{opt}->{'commit-delay'}) {
+ eval { $lei->{sto}->wq_do('schedule_commit', $n) };
+ } else {
+ my $s = ($wq ? $wq->{lei_sock} : undef) // $lei->{sock};
+ my $errfh = $lei->{2} // *STDERR{GLOB};
+ my @io = $s ? ($errfh, $s) : ($errfh);
+ eval { $lei->{sto}->wq_io_do('done', \@io) };
+ }
warn($@) if $@;
}
diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm
index 9c07af14..aebb85a9 100644
--- a/lib/PublicInbox/LeiStore.pm
+++ b/lib/PublicInbox/LeiStore.pm
@@ -34,6 +34,7 @@ use Sys::Syslog qw(syslog openlog);
use Errno qw(EEXIST ENOENT);
use PublicInbox::Syscall qw(rename_noreplace);
use PublicInbox::LeiStoreErr;
+use PublicInbox::DS qw(add_uniq_timer);
sub new {
my (undef, $dir, $opt) = @_;
@@ -113,6 +114,11 @@ sub cat_blob {
$self->{im} ? $self->{im}->cat_blob($oid) : undef;
}
+sub schedule_commit {
+ my ($self, $sec) = @_;
+ add_uniq_timer($self->{priv_eidx}->{topdir}, $sec, \&done, $self);
+}
+
# follows the stderr file
sub _tail_err {
my ($self) = @_;
diff --git a/t/lei-import.t b/t/lei-import.t
index 8b09d3aa..b2c1de9b 100644
--- a/t/lei-import.t
+++ b/t/lei-import.t
@@ -2,6 +2,7 @@
# Copyright (C) all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
use v5.12; use PublicInbox::TestCommon;
+use PublicInbox::DS qw(now);
use autodie qw(open close);
test_lei(sub {
ok(!lei(qw(import -F bogus), 't/plack-qp.eml'), 'fails with bogus format');
@@ -141,6 +142,18 @@ $res = json_utf8->decode($lei_out);
is_deeply($res->[0]->{kw}, [qw(answered flagged seen)], 'keyword added');
is_deeply($res->[0]->{L}, [qw(boombox inbox)], 'labels preserved');
+lei_ok qw(import --commit-delay=1 +L:bin -F eml t/data/binary.patch);
+lei_ok 'ls-label';
+unlike($lei_out, qr/\bbin\b/, 'commit-delay delays label');
+my $end = now + 10;
+my $n = 1;
+diag 'waiting for lei/store commit...';
+do {
+ tick $n;
+ $n = 0.1;
+} until (!lei('ls-label') || $lei_out =~ /\bbin\b/ || now > $end);
+like($lei_out, qr/\bbin\b/, 'commit-delay eventually commits');
+
# see t/lei_to_mail.t for "import -F mbox*"
});
done_testing;
diff --git a/t/lei-store-fail.t b/t/lei-store-fail.t
index fb0f2b75..c2f03148 100644
--- a/t/lei-store-fail.t
+++ b/t/lei-store-fail.t
@@ -9,8 +9,11 @@ use Fcntl qw(SEEK_SET);
use File::Path qw(remove_tree);
my $start_home = $ENV{HOME}; # bug guard
+my $utf8_oid = '9bf1002c49eb075df47247b74d69bcd555e23422';
test_lei(sub {
lei_ok qw(import -q t/plack-qp.eml); # start the store
+ ok(!lei(qw(blob --mail), $utf8_oid), 't/utf8.eml not imported, yet');
+
my $opt;
pipe($opt->{0}, my $in_w);
open $opt->{1}, '+>', undef;
@@ -20,27 +23,30 @@ test_lei(sub {
my $tp = start_script($cmd, undef, $opt);
close $opt->{0};
$in_w->autoflush(1);
- for (1..500) { # need to fill up 64k read buffer
- print $in_w <<EOM or xbail "print $!";
+ print $in_w <<EOM or xbail "print: $!";
From k\@y Fri Oct 2 00:00:00 1993
From: <k\@example.com>
Date: Sat, 02 Oct 2010 00:00:00 +0000
Subject: hi
-Message-ID: <$_\@t>
+Message-ID: <0\@t>
will this save?
EOM
- }
- tick 0.2; # XXX ugh, this is so hacky
+ # import another message w/ delay while mboxrd import is still running
+ lei_ok qw(import -q --commit-delay=300 t/utf8.eml);
+ lei_ok qw(blob --mail), $utf8_oid,
+ \'blob immediately available despite --commit-delay';
+ lei_ok qw(q m:testmessage@example.com);
+ is($lei_out, "[null]\n", 'delayed commit is unindexed');
- # make sto_done_request fail:
+ # make immediate ->sto_done_request fail from mboxrd import:
remove_tree("$ENV{HOME}/.local/share/lei/store");
# subsequent lei commands are undefined behavior,
# but we need to make sure the current lei command fails:
close $in_w; # should trigger ->done
$tp->join;
- isnt($?, 0, 'lei import error code set on failure');
+ isnt($?, 0, 'lei import -F mboxrd error code set on failure');
is(-s $opt->{1}, 0, 'nothing in stdout');
isnt(-s $opt->{2}, 0, 'stderr not empty');
seek($opt->{2}, 0, SEEK_SET);
diff --git a/t/lei-tag.t b/t/lei-tag.t
index cccf0af6..7278dfcd 100644
--- a/t/lei-tag.t
+++ b/t/lei-tag.t
@@ -1,9 +1,10 @@
#!perl -w
# Copyright (C) 2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-use strict; use v5.10.1; use PublicInbox::TestCommon;
+use v5.12; use PublicInbox::TestCommon;
require_git 2.6;
require_mods(qw(json DBD::SQLite Xapian));
+use PublicInbox::DS qw(now);
my ($ro_home, $cfg_path) = setup_public_inboxes;
my $check_kw = sub {
my ($exp, %opt) = @_;
@@ -104,5 +105,17 @@ test_lei(sub {
lei_ok qw(tag +L:nope -F eml t/data/binary.patch);
like $lei_err, qr/\b1 unimported messages/, 'noted unimported'
or diag $lei_err;
+
+ lei_ok qw(tag -F eml --commit-delay=1 t/utf8.eml +L:utf8);
+ lei_ok 'ls-label';
+ unlike($lei_out, qr/\butf8\b/, 'commit-delay delays label');
+ my $end = now + 10;
+ my $n = 1;
+ diag 'waiting for lei/store commit...';
+ do {
+ tick $n;
+ $n = 0.1;
+ } until (!lei('ls-label') || $lei_out =~ /\butf8\b/ || now > $end);
+ like($lei_out, qr/\butf8\b/, 'commit-delay eventually commits');
});
done_testing;
prev parent reply other threads:[~2023-10-11 7:20 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-10-11 7:20 [PATCH 0/9] lei + import-related updates Eric Wong
2023-10-11 7:20 ` [PATCH 1/9] lei rediff: use ProcessIO for --drq support Eric Wong
2023-10-11 7:20 ` [PATCH 2/9] lei_xsearch: improve curl progress reporting Eric Wong
2023-10-11 7:20 ` [PATCH 3/9] msgtime: quiet warnings we can do nothing about Eric Wong
2023-10-11 7:20 ` [PATCH 4/9] msgtime: simplify msg_timestamp and msg_datestamp Eric Wong
2023-10-11 7:20 ` [PATCH 5/9] treewide: consolidate "From " line removal Eric Wong
2023-10-11 7:20 ` [PATCH 6/9] import: switch to Unix stream socket for fast-import Eric Wong
2023-10-11 7:20 ` [PATCH 7/9] import: cat_blob is a no-op w/o live fast-import Eric Wong
2023-10-11 7:20 ` [PATCH 8/9] lei blob: run cat_blob on lei/store for pending blobs Eric Wong
2023-10-11 7:20 ` Eric Wong [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20231011072057.758022-10-e@80x24.org \
--to=e@80x24.org \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).