* [PATCH 1/2] lei_xsearch: simplify lei/store import check
2022-07-07 9:40 [PATCH 0/2] lei: minor diagnostic improvement Eric Wong
@ 2022-07-07 9:40 ` Eric Wong
2022-07-07 9:40 ` [PATCH 2/2] lei: track seen messages to note duplicates Eric Wong
1 sibling, 0 replies; 3+ messages in thread
From: Eric Wong @ 2022-07-07 9:40 UTC (permalink / raw)
To: meta
There's no need to check for two fields when one will suffice.
---
lib/PublicInbox/LeiXSearch.pm | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index 2958d3f9..41e79856 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -285,7 +285,7 @@ sub each_remote_eml { # callback for MboxReader->mboxrd
my ($res, $kw) = $self->{import_sto}->wq_do('add_eml', $eml);
if (ref($res) eq ref($smsg)) { # totally new message
$smsg = $res;
- $self->{-imported} = 1;
+ $self->{-sto_imported} = 1;
}
$smsg->{kw} = $kw; # short-circuit xsmsg_vmd
}
@@ -376,7 +376,7 @@ sub query_remote_mboxrd {
$fh = IO::Uncompress::Gunzip->new($fh, MultiStream => 1);
PublicInbox::MboxReader->mboxrd($fh, \&each_remote_eml, $self,
$lei, $each_smsg);
- if ($self->{import_sto} && delete($self->{-imported})) {
+ if (delete($self->{-sto_imported})) {
my $wait = $self->{import_sto}->wq_do('done');
}
$reap_curl->join;
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [PATCH 2/2] lei: track seen messages to note duplicates
2022-07-07 9:40 [PATCH 0/2] lei: minor diagnostic improvement Eric Wong
2022-07-07 9:40 ` [PATCH 1/2] lei_xsearch: simplify lei/store import check Eric Wong
@ 2022-07-07 9:40 ` Eric Wong
1 sibling, 0 replies; 3+ messages in thread
From: Eric Wong @ 2022-07-07 9:40 UTC (permalink / raw)
To: meta
This may help track down deduplication or other bugs in lei
which lead to occasionally missing messages.
Link: https://public-inbox.org/meta/CAL_JsqJH8xx_2NyZffNsRXbGXiv3kjmCETvKXt3Yfb0uToLm9Q@mail.gmail.com/
---
lib/PublicInbox/LeiConvert.pm | 8 +++++---
lib/PublicInbox/LeiToMail.pm | 13 ++++++++++---
lib/PublicInbox/LeiXSearch.pm | 20 ++++++++++++--------
3 files changed, 27 insertions(+), 14 deletions(-)
diff --git a/lib/PublicInbox/LeiConvert.pm b/lib/PublicInbox/LeiConvert.pm
index 906f3026..59af40de 100644
--- a/lib/PublicInbox/LeiConvert.pm
+++ b/lib/PublicInbox/LeiConvert.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# front-end for the "lei convert" sub-command
@@ -35,8 +35,10 @@ sub process_inputs { # via wq_do
my $lei = $self->{lei};
delete $lei->{1};
delete $self->{wcb}; # commit
- my $nr = delete($lei->{-nr_write}) // 0;
- $lei->qerr("# converted $nr messages");
+ my $nr_w = delete($lei->{-nr_write}) // 0;
+ my $d = (delete($lei->{-nr_seen}) // 0) - $nr_w;
+ $d = $d ? " ($d duplicates)" : '';
+ $lei->qerr("# converted $nr_w messages$d");
}
sub lei_convert { # the main "lei convert" method
diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index 3c5e7e59..2aa3977e 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Writes PublicInbox::Eml objects atomically to a mbox variant or Maildir
@@ -197,6 +197,7 @@ sub _mbox_write_cb ($$) {
sub { # for git_to_mail
my ($buf, $smsg, $eml) = @_;
$eml //= PublicInbox::Eml->new($buf);
+ ++$lei->{-nr_seen};
return if $dedupe->is_dup($eml, $smsg);
$lse->xsmsg_vmd($smsg) if $lse;
$smsg->{-recent} = 1 if $set_recent;
@@ -291,6 +292,8 @@ sub _maildir_write_cb ($$) {
sub { # for git_to_mail
my ($bref, $smsg, $eml) = @_;
$dst // return $lei->fail; # dst may be undef-ed in last run
+
+ ++$lei->{-nr_seen};
return if $dedupe && $dedupe->is_dup($eml //
PublicInbox::Eml->new($$bref),
$smsg);
@@ -317,6 +320,8 @@ sub _imap_write_cb ($$) {
sub { # for git_to_mail
my ($bref, $smsg, $eml) = @_;
$mic // return $lei->fail; # mic may be undef-ed in last run
+
+ ++$lei->{-nr_seen};
return if $dedupe && $dedupe->is_dup($eml //
PublicInbox::Eml->new($$bref),
$smsg);
@@ -360,6 +365,7 @@ sub _v2_write_cb ($$) {
sub { # for git_to_mail
my ($bref, $smsg, $eml) = @_;
$eml //= PublicInbox::Eml->new($bref);
+ ++$lei->{-nr_seen};
return if $dedupe && $dedupe->is_dup($eml, $smsg);
$lei->{v2w}->wq_do('add', $eml); # V2Writable->add
++$lei->{-nr_write};
@@ -792,9 +798,10 @@ sub wq_atexit_child {
my $lei = $self->{lei};
delete $self->{wcb};
$lei->{ale}->git->async_wait_all;
- my $nr = delete($lei->{-nr_write}) or return;
+ my ($nr_w, $nr_s) = delete(@$lei{qw(-nr_write -nr_seen)});
+ $nr_s or return;
return if $lei->{early_mua} || !$lei->{-progress} || !$lei->{pkt_op_p};
- $lei->{pkt_op_p}->pkt_do('l2m_progress', $nr);
+ $lei->{pkt_op_p}->pkt_do('l2m_progress', $nr_w, $nr_s);
}
# runs on a 1s timer in lei-daemon
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index 41e79856..6f877019 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Combine any combination of PublicInbox::Search,
@@ -163,8 +163,9 @@ sub mset_progress {
}
sub l2m_progress {
- my ($lei, $nr) = @_;
- $lei->{-nr_write} += $nr;
+ my ($lei, $nr_write, $nr_seen) = @_;
+ $lei->{-nr_write} += $nr_write;
+ $lei->{-nr_seen} += $nr_seen;
}
sub query_one_mset { # for --threads and l2m w/o sort
@@ -447,13 +448,16 @@ Error closing $lei->{ovv}->{dst}: \$!=$! \$?=$?
}
if ($lei->{-progress}) {
my $tot = $lei->{-mset_total} // 0;
- my $nr = $lei->{-nr_write} // 0;
+ my $nr_w = $lei->{-nr_write} // 0;
+ my $d = ($lei->{-nr_seen} // 0) - $nr_w;
+ my $x = "$tot matches";
+ $x .= ", $d duplicates" if $d;
if ($l2m) {
- my $m = "# $nr written to " .
- "$lei->{ovv}->{dst} ($tot matches)";
- $nr ? $lei->qfin($m) : $lei->qerr($m);
+ my $m = "# $nr_w written to " .
+ "$lei->{ovv}->{dst} ($x)";
+ $nr_w ? $lei->qfin($m) : $lei->qerr($m);
} else {
- $lei->qerr("# $tot matches");
+ $lei->qerr("# $x");
}
}
$lei->start_mua if $l2m && !$l2m->lock_free;
^ permalink raw reply related [flat|nested] 3+ messages in thread