unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
* [PATCH 0/2] lei: minor diagnostic improvement
@ 2022-07-07  9:40 Eric Wong
  2022-07-07  9:40 ` [PATCH 1/2] lei_xsearch: simplify lei/store import check Eric Wong
  2022-07-07  9:40 ` [PATCH 2/2] lei: track seen messages to note duplicates Eric Wong
  0 siblings, 2 replies; 3+ messages in thread
From: Eric Wong @ 2022-07-07  9:40 UTC (permalink / raw)
  To: meta

Still trying to consistently reproduce the source of missing
messages.  It may be isolated to HTTP(S) remotes, or not, but
noting the number of seen vs. written messages ought to be a
reasonable start.

patch 1/2 is just a tiny simplification I noticed along the way

Eric Wong (2):
  lei_xsearch: simplify lei/store import check
  lei: track seen messages to note duplicates

 lib/PublicInbox/LeiConvert.pm |  8 +++++---
 lib/PublicInbox/LeiToMail.pm  | 13 ++++++++++---
 lib/PublicInbox/LeiXSearch.pm | 24 ++++++++++++++----------
 3 files changed, 29 insertions(+), 16 deletions(-)

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH 1/2] lei_xsearch: simplify lei/store import check
  2022-07-07  9:40 [PATCH 0/2] lei: minor diagnostic improvement Eric Wong
@ 2022-07-07  9:40 ` Eric Wong
  2022-07-07  9:40 ` [PATCH 2/2] lei: track seen messages to note duplicates Eric Wong
  1 sibling, 0 replies; 3+ messages in thread
From: Eric Wong @ 2022-07-07  9:40 UTC (permalink / raw)
  To: meta

There's no need to check for two fields when one will suffice.
---
 lib/PublicInbox/LeiXSearch.pm | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index 2958d3f9..41e79856 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -285,7 +285,7 @@ sub each_remote_eml { # callback for MboxReader->mboxrd
 		my ($res, $kw) = $self->{import_sto}->wq_do('add_eml', $eml);
 		if (ref($res) eq ref($smsg)) { # totally new message
 			$smsg = $res;
-			$self->{-imported} = 1;
+			$self->{-sto_imported} = 1;
 		}
 		$smsg->{kw} = $kw; # short-circuit xsmsg_vmd
 	}
@@ -376,7 +376,7 @@ sub query_remote_mboxrd {
 		$fh = IO::Uncompress::Gunzip->new($fh, MultiStream => 1);
 		PublicInbox::MboxReader->mboxrd($fh, \&each_remote_eml, $self,
 						$lei, $each_smsg);
-		if ($self->{import_sto} && delete($self->{-imported})) {
+		if (delete($self->{-sto_imported})) {
 			my $wait = $self->{import_sto}->wq_do('done');
 		}
 		$reap_curl->join;

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH 2/2] lei: track seen messages to note duplicates
  2022-07-07  9:40 [PATCH 0/2] lei: minor diagnostic improvement Eric Wong
  2022-07-07  9:40 ` [PATCH 1/2] lei_xsearch: simplify lei/store import check Eric Wong
@ 2022-07-07  9:40 ` Eric Wong
  1 sibling, 0 replies; 3+ messages in thread
From: Eric Wong @ 2022-07-07  9:40 UTC (permalink / raw)
  To: meta

This may help track down deduplication or other bugs in lei
which lead to occasionally missing messages.

Link: https://public-inbox.org/meta/CAL_JsqJH8xx_2NyZffNsRXbGXiv3kjmCETvKXt3Yfb0uToLm9Q@mail.gmail.com/
---
 lib/PublicInbox/LeiConvert.pm |  8 +++++---
 lib/PublicInbox/LeiToMail.pm  | 13 ++++++++++---
 lib/PublicInbox/LeiXSearch.pm | 20 ++++++++++++--------
 3 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/lib/PublicInbox/LeiConvert.pm b/lib/PublicInbox/LeiConvert.pm
index 906f3026..59af40de 100644
--- a/lib/PublicInbox/LeiConvert.pm
+++ b/lib/PublicInbox/LeiConvert.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # front-end for the "lei convert" sub-command
@@ -35,8 +35,10 @@ sub process_inputs { # via wq_do
 	my $lei = $self->{lei};
 	delete $lei->{1};
 	delete $self->{wcb}; # commit
-	my $nr = delete($lei->{-nr_write}) // 0;
-	$lei->qerr("# converted $nr messages");
+	my $nr_w = delete($lei->{-nr_write}) // 0;
+	my $d = (delete($lei->{-nr_seen}) // 0) - $nr_w;
+	$d = $d ? " ($d duplicates)" : '';
+	$lei->qerr("# converted $nr_w messages$d");
 }
 
 sub lei_convert { # the main "lei convert" method
diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index 3c5e7e59..2aa3977e 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # Writes PublicInbox::Eml objects atomically to a mbox variant or Maildir
@@ -197,6 +197,7 @@ sub _mbox_write_cb ($$) {
 	sub { # for git_to_mail
 		my ($buf, $smsg, $eml) = @_;
 		$eml //= PublicInbox::Eml->new($buf);
+		++$lei->{-nr_seen};
 		return if $dedupe->is_dup($eml, $smsg);
 		$lse->xsmsg_vmd($smsg) if $lse;
 		$smsg->{-recent} = 1 if $set_recent;
@@ -291,6 +292,8 @@ sub _maildir_write_cb ($$) {
 	sub { # for git_to_mail
 		my ($bref, $smsg, $eml) = @_;
 		$dst // return $lei->fail; # dst may be undef-ed in last run
+
+		++$lei->{-nr_seen};
 		return if $dedupe && $dedupe->is_dup($eml //
 						PublicInbox::Eml->new($$bref),
 						$smsg);
@@ -317,6 +320,8 @@ sub _imap_write_cb ($$) {
 	sub { # for git_to_mail
 		my ($bref, $smsg, $eml) = @_;
 		$mic // return $lei->fail; # mic may be undef-ed in last run
+
+		++$lei->{-nr_seen};
 		return if $dedupe && $dedupe->is_dup($eml //
 						PublicInbox::Eml->new($$bref),
 						$smsg);
@@ -360,6 +365,7 @@ sub _v2_write_cb ($$) {
 	sub { # for git_to_mail
 		my ($bref, $smsg, $eml) = @_;
 		$eml //= PublicInbox::Eml->new($bref);
+		++$lei->{-nr_seen};
 		return if $dedupe && $dedupe->is_dup($eml, $smsg);
 		$lei->{v2w}->wq_do('add', $eml); # V2Writable->add
 		++$lei->{-nr_write};
@@ -792,9 +798,10 @@ sub wq_atexit_child {
 	my $lei = $self->{lei};
 	delete $self->{wcb};
 	$lei->{ale}->git->async_wait_all;
-	my $nr = delete($lei->{-nr_write}) or return;
+	my ($nr_w, $nr_s) = delete(@$lei{qw(-nr_write -nr_seen)});
+	$nr_s or return;
 	return if $lei->{early_mua} || !$lei->{-progress} || !$lei->{pkt_op_p};
-	$lei->{pkt_op_p}->pkt_do('l2m_progress', $nr);
+	$lei->{pkt_op_p}->pkt_do('l2m_progress', $nr_w, $nr_s);
 }
 
 # runs on a 1s timer in lei-daemon
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index 41e79856..6f877019 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # Combine any combination of PublicInbox::Search,
@@ -163,8 +163,9 @@ sub mset_progress {
 }
 
 sub l2m_progress {
-	my ($lei, $nr) = @_;
-	$lei->{-nr_write} += $nr;
+	my ($lei, $nr_write, $nr_seen) = @_;
+	$lei->{-nr_write} += $nr_write;
+	$lei->{-nr_seen} += $nr_seen;
 }
 
 sub query_one_mset { # for --threads and l2m w/o sort
@@ -447,13 +448,16 @@ Error closing $lei->{ovv}->{dst}: \$!=$! \$?=$?
 		}
 		if ($lei->{-progress}) {
 			my $tot = $lei->{-mset_total} // 0;
-			my $nr = $lei->{-nr_write} // 0;
+			my $nr_w = $lei->{-nr_write} // 0;
+			my $d = ($lei->{-nr_seen} // 0) - $nr_w;
+			my $x = "$tot matches";
+			$x .= ", $d duplicates" if $d;
 			if ($l2m) {
-				my $m = "# $nr written to " .
-					"$lei->{ovv}->{dst} ($tot matches)";
-				$nr ? $lei->qfin($m) : $lei->qerr($m);
+				my $m = "# $nr_w written to " .
+					"$lei->{ovv}->{dst} ($x)";
+				$nr_w ? $lei->qfin($m) : $lei->qerr($m);
 			} else {
-				$lei->qerr("# $tot matches");
+				$lei->qerr("# $x");
 			}
 		}
 		$lei->start_mua if $l2m && !$l2m->lock_free;

^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2022-07-07  9:40 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-07-07  9:40 [PATCH 0/2] lei: minor diagnostic improvement Eric Wong
2022-07-07  9:40 ` [PATCH 1/2] lei_xsearch: simplify lei/store import check Eric Wong
2022-07-07  9:40 ` [PATCH 2/2] lei: track seen messages to note duplicates Eric Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).