* [PATCH 1/5] git: rename `_active' sub to `cat_active'
2024-08-14 0:16 [PATCH 0/5] lei import-related fixes + diagnostics Eric Wong
@ 2024-08-14 0:16 ` Eric Wong
2024-08-14 0:16 ` [PATCH 2/5] lei_store: use autodie for open+close Eric Wong
` (4 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Eric Wong @ 2024-08-14 0:16 UTC (permalink / raw)
To: meta
Having an underscore prefix is confusing for a public
subroutine, and the `cat_' prefix makes it obvious it isn't
some other command.
---
lib/PublicInbox/Git.pm | 6 +++---
lib/PublicInbox/LeiStore.pm | 2 +-
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm
index 32c11a59..dd0a14e9 100644
--- a/lib/PublicInbox/Git.pm
+++ b/lib/PublicInbox/Git.pm
@@ -457,7 +457,7 @@ sub date_parse {
} $self->qx('rev-parse', map { "--since=$_" } @_);
}
-sub _active ($) {
+sub cat_active ($) {
scalar(@{gcf_inflight($_[0]) // []}) ||
($_[0]->{ck} && scalar(@{gcf_inflight($_[0]->{ck}) // []}))
}
@@ -466,7 +466,7 @@ sub _active ($) {
# both completely done by using this:
sub async_wait_all ($) {
my ($self) = @_;
- while (_active($self)) {
+ while (cat_active($self)) {
check_async_wait($self);
cat_async_wait($self);
}
@@ -475,7 +475,7 @@ sub async_wait_all ($) {
# returns true if there are pending "git cat-file" processes
sub cleanup {
my ($self, $lazy) = @_;
- ($lazy && _active($self)) and
+ ($lazy && cat_active($self)) and
return $self->{epwatch} ? watch_async($self) : 1;
local $in_cleanup = 1;
async_wait_all($self);
diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm
index b2da2bc3..f9e8267b 100644
--- a/lib/PublicInbox/LeiStore.pm
+++ b/lib/PublicInbox/LeiStore.pm
@@ -573,7 +573,7 @@ sub set_xvmd {
sub check_done {
my ($self) = @_;
- $self->git->_active ?
+ $self->git->cat_active ?
add_uniq_timer("$self-check_done", 5, \&check_done, $self) :
done($self);
}
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 2/5] lei_store: use autodie for open+close
2024-08-14 0:16 [PATCH 0/5] lei import-related fixes + diagnostics Eric Wong
2024-08-14 0:16 ` [PATCH 1/5] git: rename `_active' sub to `cat_active' Eric Wong
@ 2024-08-14 0:16 ` Eric Wong
2024-08-14 0:16 ` [PATCH 3/5] v2writable: confess on broken {idx_shards} Eric Wong
` (3 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Eric Wong @ 2024-08-14 0:16 UTC (permalink / raw)
To: meta
No reason to do error checking ourselves when the standard library
can do it.
---
lib/PublicInbox/LeiStore.pm | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm
index f9e8267b..9551da5f 100644
--- a/lib/PublicInbox/LeiStore.pm
+++ b/lib/PublicInbox/LeiStore.pm
@@ -15,6 +15,7 @@ package PublicInbox::LeiStore;
use strict;
use v5.10.1;
use parent qw(PublicInbox::Lock PublicInbox::IPC);
+use autodie qw(open pipe);
use PublicInbox::ExtSearchIdx;
use PublicInbox::Eml;
use PublicInbox::Import;
@@ -57,7 +58,7 @@ sub rotate_bytes {
sub git_ident ($) {
my ($git) = @_;
my $rdr = {};
- open $rdr->{2}, '>', '/dev/null' or die "open /dev/null: $!";
+ open $rdr->{2}, '>', '/dev/null';
chomp(my $i = $git->qx([qw(var GIT_COMMITTER_IDENT)], undef, $rdr));
$i =~ /\A(.+) <([^>]+)> [0-9]+ [-\+]?[0-9]+$/ and return ($1, $2);
my ($user, undef, undef, undef, undef, undef, $gecos) = getpwuid($<);
@@ -585,8 +586,8 @@ sub xchg_stderr {
return unless -e $dir;
delete $self->{-tmp_err};
my ($err, $name) = tmpnam();
- open STDERR, '>>', $name or die "dup2: $!";
- unlink($name);
+ open STDERR, '>>', $name;
+ unlink $name; # ignore errors
STDERR->autoflush(1); # shared with shard subprocesses
$self->{-tmp_err} = $err; # separate file description for RO access
undef;
@@ -648,7 +649,7 @@ sub write_prepare {
unless ($self->{-wq_s1}) {
my $dir = $lei->store_path;
substr($dir, -length('/lei/store'), 10, '');
- pipe(my ($r, $w)) or die "pipe: $!";
+ pipe(my $r, my $w);
$w->autoflush(1);
# Mail we import into lei are private, so headers filtered out
# by -mda for public mail are not appropriate
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 3/5] v2writable: confess on broken {idx_shards}
2024-08-14 0:16 [PATCH 0/5] lei import-related fixes + diagnostics Eric Wong
2024-08-14 0:16 ` [PATCH 1/5] git: rename `_active' sub to `cat_active' Eric Wong
2024-08-14 0:16 ` [PATCH 2/5] lei_store: use autodie for open+close Eric Wong
@ 2024-08-14 0:16 ` Eric Wong
2024-08-14 0:16 ` [PATCH 4/5] lei_search: make missing Xapian docs for kw lookups Eric Wong
` (2 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Eric Wong @ 2024-08-14 0:16 UTC (permalink / raw)
To: meta
There's a bug in `lei import' introduced in 4ff8e8d21ab5
(lei/store: stop shard workers + cat-file on idle, 2024-04-16)
which causes {idx_shards} to not be recreated properly.
Hopefully this can help me track it down since it's not easily
reproducible.
---
lib/PublicInbox/V2Writable.pm | 3 +++
1 file changed, 3 insertions(+)
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index 15a73158..dab5e64d 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -90,6 +90,9 @@ sub init_inbox {
sub idx_shard ($$) {
my ($self, $num) = @_;
+ use Carp qw(confess); # FIXME: lei_store bug somewhere..
+ confess 'BUG: {idx_shards} unset' if !$self->{idx_shards};
+ confess 'BUG: {idx_shards} empty' if !@{$self->{idx_shards}};
$self->{idx_shards}->[$num % scalar(@{$self->{idx_shards}})];
}
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 4/5] lei_search: make missing Xapian docs for kw lookups
2024-08-14 0:16 [PATCH 0/5] lei import-related fixes + diagnostics Eric Wong
` (2 preceding siblings ...)
2024-08-14 0:16 ` [PATCH 3/5] v2writable: confess on broken {idx_shards} Eric Wong
@ 2024-08-14 0:16 ` Eric Wong
2024-08-14 0:16 ` [PATCH 5/5] net_reader: improve IMAP error reporting Eric Wong
2024-11-19 21:51 ` [PATCH 0/5] lei import-related fixes + diagnostics Eric Wong
5 siblings, 0 replies; 7+ messages in thread
From: Eric Wong @ 2024-08-14 0:16 UTC (permalink / raw)
To: meta
Missing keyword entries should be non-fatal since Xapian
data is always less important than what's in git and SQLite.
As such, Xapian data has and remains written last, leaving
the possibility of documents being missing from Xapian but
present in SQLite and git.
This improves recovery dealing with badly interrupted or failed
imports due to bugs or hardware failures.
---
lib/PublicInbox/LeiSearch.pm | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/lib/PublicInbox/LeiSearch.pm b/lib/PublicInbox/LeiSearch.pm
index 684668c5..4519306d 100644
--- a/lib/PublicInbox/LeiSearch.pm
+++ b/lib/PublicInbox/LeiSearch.pm
@@ -145,7 +145,15 @@ sub kw_changed {
}
if (!defined($cur_kw) && $@) {
$docids = join(', num:', @$docids);
- croak "E: num:$docids keyword lookup failure: $@";
+ # this may happen if a previous import was incomplete since
+ # we commit changes to Xapian last
+ if (ref($@) =~ /::DocNotFoundError\b/) {
+ warn <<EOM;
+W: num:$docids keyword lookup failure, assuming no keywords
+EOM
+ } else {
+ croak "E: num:$docids keyword lookup failure: $@";
+ }
}
# RFC 5550 sec 5.9 on the $Forwarded keyword states:
# "Once set, the flag SHOULD NOT be cleared"
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 5/5] net_reader: improve IMAP error reporting
2024-08-14 0:16 [PATCH 0/5] lei import-related fixes + diagnostics Eric Wong
` (3 preceding siblings ...)
2024-08-14 0:16 ` [PATCH 4/5] lei_search: make missing Xapian docs for kw lookups Eric Wong
@ 2024-08-14 0:16 ` Eric Wong
2024-11-19 21:51 ` [PATCH 0/5] lei import-related fixes + diagnostics Eric Wong
5 siblings, 0 replies; 7+ messages in thread
From: Eric Wong @ 2024-08-14 0:16 UTC (permalink / raw)
To: meta
IMAPClient->LastError provides information that isn't an errno
(`$!'), so use it to tell us about anything the server reports
back.
---
lib/PublicInbox/NetReader.pm | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/lib/PublicInbox/NetReader.pm b/lib/PublicInbox/NetReader.pm
index ec18818b..7e861f5f 100644
--- a/lib/PublicInbox/NetReader.pm
+++ b/lib/PublicInbox/NetReader.pm
@@ -570,7 +570,8 @@ sub each_old_flags ($$$$) {
my $r = $mic->fetch_hash("$n:$end", 'FLAGS');
if (!$r) {
return if $!{EINTR} && $self->{quit};
- return "E: $uri UID FETCH $n:$end error: $!";
+ return "E: $uri UID FETCH $n:$end error: " .
+ $mic->LastError." \$!=$!"
}
while (my ($uid, $per_uid) = each %$r) {
my $kw = flags2kw($self, $uri, $uid, $per_uid->{FLAGS})
@@ -699,7 +700,8 @@ EOF
$uids = [ $single_uid ];
} elsif (!($uids = $mic->search("UID $l_uid:*"))) {
return if $!{EINTR} && $self->{quit};
- return "E: $uri UID SEARCH $l_uid:* error: $!";
+ return "E: $uri UID SEARCH $l_uid:* error: ".
+ $mic->LastError." \$!=$!"
}
return if scalar(@$uids) == 0;
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH 0/5] lei import-related fixes + diagnostics
2024-08-14 0:16 [PATCH 0/5] lei import-related fixes + diagnostics Eric Wong
` (4 preceding siblings ...)
2024-08-14 0:16 ` [PATCH 5/5] net_reader: improve IMAP error reporting Eric Wong
@ 2024-11-19 21:51 ` Eric Wong
5 siblings, 0 replies; 7+ messages in thread
From: Eric Wong @ 2024-11-19 21:51 UTC (permalink / raw)
To: meta
Eric Wong <e@80x24.org> wrote:
> I've noticed `lei import' on IMAP inboxes occasionally failing
> for me due to {idx_shards} not getting recreated after
> 4ff8e8d21ab5 (lei/store: stop shard workers + cat-file on idle,
> 2024-04-16).
The combination of commit 807abf67e14d35270ed0957590cde0ed1eb68635
(lei/store: auto-commit for long-running imports, 2024-11-15)
and https://public-inbox.org/meta/20241119214752.1883670-4-e@80x24.org/
likely fixes the `lei import' failures. Still doing some long-running
tests, but initial tests seem successful...
^ permalink raw reply [flat|nested] 7+ messages in thread