From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 03D4D1F934 for ; Mon, 1 Feb 2021 08:28:34 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 01/21] lei: more consistent dedupe and ovv_buf init Date: Sun, 31 Jan 2021 22:28:13 -1000 Message-Id: <20210201082833.3293-2-e@80x24.org> In-Reply-To: <20210201082833.3293-1-e@80x24.org> References: <20210201082833.3293-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: This fixes "--dedupe none" with Maildir where we don't create the object at all. --- lib/PublicInbox/LeiDedupe.pm | 4 ++-- lib/PublicInbox/LeiOverview.pm | 18 ++++++++++-------- lib/PublicInbox/LeiToMail.pm | 3 +-- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/lib/PublicInbox/LeiDedupe.pm b/lib/PublicInbox/LeiDedupe.pm index 3f478aa4..e3ae8e33 100644 --- a/lib/PublicInbox/LeiDedupe.pm +++ b/lib/PublicInbox/LeiDedupe.pm @@ -103,8 +103,8 @@ sub new { bless [ $skv, undef, undef, $m ], $cls; } -# returns true on unseen messages according to the deduplication strategy, -# returns false if seen +# returns true on seen messages according to the deduplication strategy, +# returns false if unseen sub is_dup { my ($self, $eml, $oid) = @_; !$self->[1]->($eml, $oid); diff --git a/lib/PublicInbox/LeiOverview.pm b/lib/PublicInbox/LeiOverview.pm index c67e2747..fa041457 100644 --- a/lib/PublicInbox/LeiOverview.pm +++ b/lib/PublicInbox/LeiOverview.pm @@ -92,13 +92,14 @@ sub new { ovv_out_lk_init($self); } } - if (!$json) { + if ($json) { + $lei->{dedupe} //= PublicInbox::LeiDedupe->new($lei); + } else { # default to the cheapest sort since MUA usually resorts $lei->{opt}->{'sort'} //= 'docid' if $dst ne '/dev/stdout'; $lei->{l2m} = eval { PublicInbox::LeiToMail->new($lei) }; return $lei->fail($@) if $@; } - $lei->{dedupe} //= PublicInbox::LeiDedupe->new($lei); $self; } @@ -201,15 +202,19 @@ sub _json_pretty { sub ovv_each_smsg_cb { # runs in wq worker usually my ($self, $lei, $ibxish) = @_; - my $json; + my ($json, $dedupe); $lei->{1}->autoflush(1); - my $dedupe = $lei->{dedupe} // die 'BUG: {dedupe} missing'; if (my $pkg = $self->{json}) { $json = $pkg->new; $json->utf8->canonical; $json->ascii(1) if $lei->{opt}->{ascii}; } - my $l2m = $lei->{l2m} or $dedupe->prepare_dedupe; + my $l2m = $lei->{l2m}; + if (!$l2m) { + $dedupe = $lei->{dedupe} // die 'BUG: {dedupe} missing'; + $dedupe->prepare_dedupe; + } + $lei->{ovv_buf} = \(my $buf = '') if !$l2m; if ($l2m && !$ibxish) { # remote https?:// mboxrd delete $l2m->{-wq_s1}; my $g2m = $l2m->can('git_to_mail'); @@ -241,7 +246,6 @@ sub ovv_each_smsg_cb { # runs in wq worker usually my $git = $ibxish->git; # (LeiXSearch|Inbox|ExtSearch)->git $self->{git} = $git; # for ovv_atexit_child my $g2m = $l2m->can('git_to_mail'); - $dedupe->prepare_dedupe; sub { my ($smsg, $mitem) = @_; $smsg->{pct} = get_pct($mitem) if $mitem; @@ -249,7 +253,6 @@ sub ovv_each_smsg_cb { # runs in wq worker usually }; } elsif ($self->{fmt} =~ /\A(concat)?json\z/ && $lei->{opt}->{pretty}) { my $EOR = ($1//'') eq 'concat' ? "\n}" : "\n},"; - $lei->{ovv_buf} = \(my $buf = ''); sub { # DIY prettiness :P my ($smsg, $mitem) = @_; return if $dedupe->is_smsg_dup($smsg); @@ -273,7 +276,6 @@ sub ovv_each_smsg_cb { # runs in wq worker usually } } elsif ($json) { my $ORS = $self->{fmt} eq 'json' ? ",\n" : "\n"; # JSONL - $lei->{ovv_buf} = \(my $buf = ''); sub { my ($smsg, $mitem) = @_; return if $dedupe->is_smsg_dup($smsg); diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm index 61b546b5..244bfb67 100644 --- a/lib/PublicInbox/LeiToMail.pm +++ b/lib/PublicInbox/LeiToMail.pm @@ -323,7 +323,7 @@ sub _buf2maildir { sub _maildir_write_cb ($$) { my ($self, $lei) = @_; my $dedupe = $lei->{dedupe}; - $dedupe->prepare_dedupe; + $dedupe->prepare_dedupe if $dedupe; my $dst = $lei->{ovv}->{dst}; sub { # for git_to_mail my ($buf, $smsg, $eml) = @_; @@ -464,7 +464,6 @@ sub write_mail { # via ->wq_do my $wcb = $self->{wcb} //= do { # first message my %sig = $lei->atfork_child_wq($self); @SIG{keys %sig} = values %sig; # not local - $lei->{dedupe}->prepare_dedupe; $self->write_cb($lei); }; my $git = $self->{"$$\0$git_dir"} //= PublicInbox::Git->new($git_dir);