From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 9EA681F9E5 for ; Tue, 13 Apr 2021 10:54:46 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 2/5] lei_dedupe: adjust to prepare for saved searches Date: Tue, 13 Apr 2021 10:54:43 +0000 Message-Id: <20210413105446.7245-3-e@80x24.org> In-Reply-To: <20210413105446.7245-1-e@80x24.org> References: <20210413105446.7245-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: LeiSavedSearch will use a LeiDedupe-like internal API, so we won't have to make as many changes to callsites between saved and unsaved searches. --- lib/PublicInbox/LeiDedupe.pm | 16 ++++++++-------- lib/PublicInbox/LeiToMail.pm | 6 +++--- t/lei_dedupe.t | 11 +++++++---- 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/lib/PublicInbox/LeiDedupe.pm b/lib/PublicInbox/LeiDedupe.pm index a62b3a7c..378f748e 100644 --- a/lib/PublicInbox/LeiDedupe.pm +++ b/lib/PublicInbox/LeiDedupe.pm @@ -41,8 +41,8 @@ sub smsg_hash ($) { sub dedupe_oid ($) { my ($skv) = @_; (sub { # may be called in a child process - my ($eml, $oid) = @_; - $skv->set_maybe(_oidbin($oid) // _regen_oid($eml), ''); + my ($eml, $oidhex) = @_; + $skv->set_maybe(_oidbin($oidhex) // _regen_oid($eml), ''); }, sub { my ($smsg) = @_; $skv->set_maybe(_oidbin($smsg->{blob}), ''); @@ -53,9 +53,9 @@ sub dedupe_oid ($) { sub dedupe_mid ($) { my ($skv) = @_; (sub { # may be called in a child process - my ($eml, $oid) = @_; - # TODO: lei will support non-public messages w/o Message-ID - my $mid = $eml->header_raw('Message-ID') // _oidbin($oid) // + my ($eml, $oidhex) = @_; + # lei supports non-public drafts w/o Message-ID + my $mid = $eml->header_raw('Message-ID') // _oidbin($oidhex) // content_hash($eml); $skv->set_maybe($mid, ''); }, sub { @@ -71,7 +71,7 @@ sub dedupe_mid ($) { sub dedupe_content ($) { my ($skv) = @_; (sub { # may be called in a child process - my ($eml) = @_; # oid = $_[1], ignored + my ($eml) = @_; # $oidhex = $_[1], ignored $skv->set_maybe(content_hash($eml), ''); }, sub { my ($smsg) = @_; @@ -104,8 +104,8 @@ sub new { # returns true on seen messages according to the deduplication strategy, # returns false if unseen sub is_dup { - my ($self, $eml, $oid) = @_; - !$self->[1]->($eml, $oid); + my ($self, $eml, $smsg) = @_; + !$self->[1]->($eml, $smsg ? $smsg->{blob} : undef); } sub is_smsg_dup { diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm index 70164e40..7adbffe7 100644 --- a/lib/PublicInbox/LeiToMail.pm +++ b/lib/PublicInbox/LeiToMail.pm @@ -194,7 +194,7 @@ sub _mbox_write_cb ($$) { sub { # for git_to_mail my ($buf, $smsg, $eml) = @_; $eml //= PublicInbox::Eml->new($buf); - return if $dedupe->is_dup($eml, $smsg->{blob}); + return if $dedupe->is_dup($eml, $smsg); $lse->xsmsg_vmd($smsg) if $lse; $buf = $eml2mbox->($eml, $smsg); return atomic_append($lei, $buf) if $atomic_append; @@ -280,7 +280,7 @@ sub _maildir_write_cb ($$) { $lse->xsmsg_vmd($smsg) if $lse; return _buf2maildir($dst, $buf, $smsg) if !$dedupe; $eml //= PublicInbox::Eml->new($$buf); # copy buf - return if $dedupe->is_dup($eml, $smsg->{blob}); + return if $dedupe->is_dup($eml, $smsg); undef $eml; _buf2maildir($dst, $buf, $smsg); } @@ -299,7 +299,7 @@ sub _imap_write_cb ($$) { $mic // return $lei->fail; # mic may be undef-ed in last run if ($dedupe) { $eml //= PublicInbox::Eml->new($$bref); # copy bref - return if $dedupe->is_dup($eml, $smsg->{blob}); + return if $dedupe->is_dup($eml, $smsg); } $lse->xsmsg_vmd($smsg) if $lse; eval { $imap_append->($mic, $folder, $bref, $smsg, $eml) }; diff --git a/t/lei_dedupe.t b/t/lei_dedupe.t index bcb06a0a..e1944d02 100644 --- a/t/lei_dedupe.t +++ b/t/lei_dedupe.t @@ -74,10 +74,13 @@ ok(!$dd->is_dup($different), 'different is_dup with mid dedupe (augment)'); $different->header_set('Status', 'RO'); ok($dd->is_dup($different), 'different seen with oid dedupe Status removed'); -ok(!$dd->is_dup($eml, '01d'), '1st is_dup with oid dedupe'); -ok($dd->is_dup($different, '01d'), 'different content ignored if oid matches'); -ok($dd->is_dup($eml, '01D'), 'case insensitive oid comparison :P'); -ok(!$dd->is_dup($eml, '01dbad'), 'case insensitive oid comparison :P'); +$smsg = { blob => '01d' }; +ok(!$dd->is_dup($eml, $smsg), '1st is_dup with oid dedupe'); +ok($dd->is_dup($different, $smsg), 'different content ignored if oid matches'); +$smsg->{blob} = uc($smsg->{blob}); +ok($dd->is_dup($eml, $smsg), 'case insensitive oid comparison :P'); +$smsg->{blob} = '01dbad'; +ok(!$dd->is_dup($eml, $smsg), 'case insensitive oid comparison :P'); $smsg->{blob} = 'dead'; ok(!$dd->is_smsg_dup($smsg), 'smsg dedupe pass');