From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH] lei up: fix dedupe with remote externals on Maildir + IMAP
Date: Mon, 3 May 2021 20:57:31 +0000 [thread overview]
Message-ID: <20210503205731.8747-1-e@80x24.org> (raw)
LeiToMail Maildir and IMAP write callbacks need to account for
the caller-supplied smsg. We'll also make better use of the
user-supplied smsg object by ensuring blob deduplication happens
ASAP.
Fixes: e76683309ca4f254 ("lei <q|up>: distinguish between mset and l2m counts")
---
lib/PublicInbox/LeiSavedSearch.pm | 15 ++++++++-------
lib/PublicInbox/LeiToMail.pm | 6 ++++--
t/lei-q-remote-import.t | 6 ++++++
3 files changed, 18 insertions(+), 9 deletions(-)
diff --git a/lib/PublicInbox/LeiSavedSearch.pm b/lib/PublicInbox/LeiSavedSearch.pm
index 8177c98e..92ced28b 100644
--- a/lib/PublicInbox/LeiSavedSearch.pm
+++ b/lib/PublicInbox/LeiSavedSearch.pm
@@ -170,23 +170,24 @@ sub cfg_set { # called by LeiXSearch
sub is_dup {
my ($self, $eml, $smsg) = @_;
my $oidx = $self->{oidx} // die 'BUG: no {oidx}';
- my $blob = $smsg ? $smsg->{blob} : undef;
- my $lk = $self->lock_for_scope_fast;
- return 1 if $blob && $oidx->blob_exists($blob);
+ my $lk;
if ($self->{-dedupe_mid}) {
+ $lk //= $self->lock_for_scope_fast;
for my $mid (@{mids_for_index($eml)}) {
my ($id, $prv);
return 1 if $oidx->next_by_mid($mid, \$id, \$prv);
}
}
+ my $blob = $smsg ? $smsg->{blob} : git_sha(1, $eml)->hexdigest;
+ $lk //= $self->lock_for_scope_fast;
+ return 1 if $oidx->blob_exists($blob);
if (my $xoids = PublicInbox::LeiSearch::xoids_for($self, $eml, 1)) {
for my $docid (values %$xoids) {
$oidx->add_xref3($docid, -1, $blob, '.');
}
$oidx->commit_lazy;
if ($self->{-dedupe_oid}) {
- $smsg->{blob} //= git_sha(1, $eml)->hexdigest;
- exists $xoids->{$smsg->{blob}} ? 1 : undef;
+ exists $xoids->{$blob} ? 1 : undef;
} else {
1;
}
@@ -197,11 +198,11 @@ sub is_dup {
$smsg->{bytes} = 0;
$smsg->populate($eml);
}
+ $smsg->{blob} //= $blob;
$oidx->begin_lazy;
$smsg->{num} = $oidx->adj_counter('eidx_docid', '+');
- $smsg->{blob} //= git_sha(1, $eml)->hexdigest;
$oidx->add_overview($eml, $smsg);
- $oidx->add_xref3($smsg->{num}, -1, $smsg->{blob}, '.');
+ $oidx->add_xref3($smsg->{num}, -1, $blob, '.');
$oidx->commit_lazy;
undef;
}
diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index 71acf952..64061788 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -283,7 +283,8 @@ sub _maildir_write_cb ($$) {
my ($bref, $smsg, $eml) = @_;
$dst // return $lei->fail; # dst may be undef-ed in last run
return if $dedupe && $dedupe->is_dup($eml //
- PublicInbox::Eml->new($$bref));
+ PublicInbox::Eml->new($$bref),
+ $smsg);
$lse->xsmsg_vmd($smsg) if $lse;
my $n = _buf2maildir($dst, $bref // \($eml->as_string), $smsg);
$sto->ipc_do('set_sync_info', $smsg->{blob}, $out, $n) if $sto;
@@ -305,7 +306,8 @@ sub _imap_write_cb ($$) {
my ($bref, $smsg, $eml) = @_;
$mic // return $lei->fail; # mic may be undef-ed in last run
return if $dedupe && $dedupe->is_dup($eml //
- PublicInbox::Eml->new($$bref));
+ PublicInbox::Eml->new($$bref),
+ $smsg);
$lse->xsmsg_vmd($smsg) if $lse;
my $uid = eval { $append->($mic, $folder, $bref, $smsg, $eml) };
if (my $err = $@) {
diff --git a/t/lei-q-remote-import.t b/t/lei-q-remote-import.t
index 32c5172b..80067061 100644
--- a/t/lei-q-remote-import.t
+++ b/t/lei-q-remote-import.t
@@ -91,5 +91,11 @@ EOF
lei_ok(qw(q -o mboxrd:/dev/stdout m:never-before-seen@example.com));
like($lei_out, qr/seen\@example\.com>\nStatus: RO\n\nwhatever/sm,
'--import-before imported totally unseen message');
+
+ lei_ok(qw(q --save z:0.. -o), "$ENV{HOME}/md", '--only', $url);
+ my @f = glob("$ENV{HOME}/md/*/*");
+ lei_ok('up', "$ENV{HOME}/md");
+ is_deeply(\@f, [ glob("$ENV{HOME}/md/*/*") ],
+ 'lei up remote dedupe works on maildir');
});
done_testing;
reply other threads:[~2021-05-03 20:57 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210503205731.8747-1-e@80x24.org \
--to=e@80x24.org \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).