unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 2/2] lei: track seen messages to note duplicates
Date: Thu,  7 Jul 2022 09:40:30 +0000	[thread overview]
Message-ID: <20220707094030.1185793-3-e@80x24.org> (raw)
In-Reply-To: <20220707094030.1185793-1-e@80x24.org>

This may help track down deduplication or other bugs in lei
which lead to occasionally missing messages.

Link: https://public-inbox.org/meta/CAL_JsqJH8xx_2NyZffNsRXbGXiv3kjmCETvKXt3Yfb0uToLm9Q@mail.gmail.com/
---
 lib/PublicInbox/LeiConvert.pm |  8 +++++---
 lib/PublicInbox/LeiToMail.pm  | 13 ++++++++++---
 lib/PublicInbox/LeiXSearch.pm | 20 ++++++++++++--------
 3 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/lib/PublicInbox/LeiConvert.pm b/lib/PublicInbox/LeiConvert.pm
index 906f3026..59af40de 100644
--- a/lib/PublicInbox/LeiConvert.pm
+++ b/lib/PublicInbox/LeiConvert.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # front-end for the "lei convert" sub-command
@@ -35,8 +35,10 @@ sub process_inputs { # via wq_do
 	my $lei = $self->{lei};
 	delete $lei->{1};
 	delete $self->{wcb}; # commit
-	my $nr = delete($lei->{-nr_write}) // 0;
-	$lei->qerr("# converted $nr messages");
+	my $nr_w = delete($lei->{-nr_write}) // 0;
+	my $d = (delete($lei->{-nr_seen}) // 0) - $nr_w;
+	$d = $d ? " ($d duplicates)" : '';
+	$lei->qerr("# converted $nr_w messages$d");
 }
 
 sub lei_convert { # the main "lei convert" method
diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index 3c5e7e59..2aa3977e 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # Writes PublicInbox::Eml objects atomically to a mbox variant or Maildir
@@ -197,6 +197,7 @@ sub _mbox_write_cb ($$) {
 	sub { # for git_to_mail
 		my ($buf, $smsg, $eml) = @_;
 		$eml //= PublicInbox::Eml->new($buf);
+		++$lei->{-nr_seen};
 		return if $dedupe->is_dup($eml, $smsg);
 		$lse->xsmsg_vmd($smsg) if $lse;
 		$smsg->{-recent} = 1 if $set_recent;
@@ -291,6 +292,8 @@ sub _maildir_write_cb ($$) {
 	sub { # for git_to_mail
 		my ($bref, $smsg, $eml) = @_;
 		$dst // return $lei->fail; # dst may be undef-ed in last run
+
+		++$lei->{-nr_seen};
 		return if $dedupe && $dedupe->is_dup($eml //
 						PublicInbox::Eml->new($$bref),
 						$smsg);
@@ -317,6 +320,8 @@ sub _imap_write_cb ($$) {
 	sub { # for git_to_mail
 		my ($bref, $smsg, $eml) = @_;
 		$mic // return $lei->fail; # mic may be undef-ed in last run
+
+		++$lei->{-nr_seen};
 		return if $dedupe && $dedupe->is_dup($eml //
 						PublicInbox::Eml->new($$bref),
 						$smsg);
@@ -360,6 +365,7 @@ sub _v2_write_cb ($$) {
 	sub { # for git_to_mail
 		my ($bref, $smsg, $eml) = @_;
 		$eml //= PublicInbox::Eml->new($bref);
+		++$lei->{-nr_seen};
 		return if $dedupe && $dedupe->is_dup($eml, $smsg);
 		$lei->{v2w}->wq_do('add', $eml); # V2Writable->add
 		++$lei->{-nr_write};
@@ -792,9 +798,10 @@ sub wq_atexit_child {
 	my $lei = $self->{lei};
 	delete $self->{wcb};
 	$lei->{ale}->git->async_wait_all;
-	my $nr = delete($lei->{-nr_write}) or return;
+	my ($nr_w, $nr_s) = delete(@$lei{qw(-nr_write -nr_seen)});
+	$nr_s or return;
 	return if $lei->{early_mua} || !$lei->{-progress} || !$lei->{pkt_op_p};
-	$lei->{pkt_op_p}->pkt_do('l2m_progress', $nr);
+	$lei->{pkt_op_p}->pkt_do('l2m_progress', $nr_w, $nr_s);
 }
 
 # runs on a 1s timer in lei-daemon
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index 41e79856..6f877019 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # Combine any combination of PublicInbox::Search,
@@ -163,8 +163,9 @@ sub mset_progress {
 }
 
 sub l2m_progress {
-	my ($lei, $nr) = @_;
-	$lei->{-nr_write} += $nr;
+	my ($lei, $nr_write, $nr_seen) = @_;
+	$lei->{-nr_write} += $nr_write;
+	$lei->{-nr_seen} += $nr_seen;
 }
 
 sub query_one_mset { # for --threads and l2m w/o sort
@@ -447,13 +448,16 @@ Error closing $lei->{ovv}->{dst}: \$!=$! \$?=$?
 		}
 		if ($lei->{-progress}) {
 			my $tot = $lei->{-mset_total} // 0;
-			my $nr = $lei->{-nr_write} // 0;
+			my $nr_w = $lei->{-nr_write} // 0;
+			my $d = ($lei->{-nr_seen} // 0) - $nr_w;
+			my $x = "$tot matches";
+			$x .= ", $d duplicates" if $d;
 			if ($l2m) {
-				my $m = "# $nr written to " .
-					"$lei->{ovv}->{dst} ($tot matches)";
-				$nr ? $lei->qfin($m) : $lei->qerr($m);
+				my $m = "# $nr_w written to " .
+					"$lei->{ovv}->{dst} ($x)";
+				$nr_w ? $lei->qfin($m) : $lei->qerr($m);
 			} else {
-				$lei->qerr("# $tot matches");
+				$lei->qerr("# $x");
 			}
 		}
 		$lei->start_mua if $l2m && !$l2m->lock_free;

      parent reply	other threads:[~2022-07-07  9:40 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-07-07  9:40 [PATCH 0/2] lei: minor diagnostic improvement Eric Wong
2022-07-07  9:40 ` [PATCH 1/2] lei_xsearch: simplify lei/store import check Eric Wong
2022-07-07  9:40 ` Eric Wong [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220707094030.1185793-3-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).