From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 6C8881F9FE for ; Thu, 4 Mar 2021 09:03:16 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 1/6] lei q: support --import-augment for IMAP Date: Thu, 4 Mar 2021 17:03:11 +0800 Message-Id: <20210304090316.9568-2-e@80x24.org> In-Reply-To: <20210304090316.9568-1-e@80x24.org> References: <20210304090316.9568-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: IMAP is similar to Maildir and we can now preserve keyword updates done on IMAP folders. --- lib/PublicInbox/LeiToMail.pm | 48 ++++++++++++++++++++++-------------- lib/PublicInbox/NetReader.pm | 9 +++++-- lib/PublicInbox/NetWriter.pm | 41 ++++++++++++++++++++++++++---- xt/net_writer-imap.t | 36 ++++++++++++++++++++++++--- 4 files changed, 105 insertions(+), 29 deletions(-) diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm index 3420b06e..b3228a59 100644 --- a/lib/PublicInbox/LeiToMail.pm +++ b/lib/PublicInbox/LeiToMail.pm @@ -267,6 +267,17 @@ sub _mbox_write_cb ($$) { } } +sub update_kw_maybe ($$$$) { + my ($lei, $lse, $eml, $kw) = @_; + return unless $lse; + my $x = $lse->kw_changed($eml, $kw); + if ($x) { + $lei->{sto}->ipc_do('set_eml', $eml, @$kw); + } elsif (!defined($x)) { + # TODO: xkw + } +} + sub _augment_or_unlink { # maildir_each_eml cb my ($f, $kw, $eml, $lei, $lse, $mod, $shard, $unlink) = @_; if ($mod) { @@ -276,14 +287,7 @@ sub _augment_or_unlink { # maildir_each_eml cb $1 : sha256_hex($f); my $recno = hex(substr($hex, 0, 8)); return if ($recno % $mod) != $shard; - if ($lse) { - my $x = $lse->kw_changed($eml, $kw); - if ($x) { - $lei->{sto}->ipc_do('set_eml', $eml, @$kw); - } elsif (!defined($x)) { - # TODO: xkw - } - } + update_kw_maybe($lei, $lse, $eml, $kw); } $unlink ? unlink($f) : _augment($eml, $lei); } @@ -446,26 +450,32 @@ sub _do_augment_maildir { } } -sub _post_augment_maildir { - my ($self, $lei) = @_; - $lei->{opt}->{'import-augment'} or return; - my $wait = $lei->{sto}->ipc_do('checkpoint', 1); -} - -sub _augment_imap { # PublicInbox::NetReader::imap_each cb - my ($url, $uid, $kw, $eml, $lei) = @_; - _augment($eml, $lei); +sub _imap_augment_or_delete { # PublicInbox::NetReader::imap_each cb + my ($url, $uid, $kw, $eml, $lei, $lse, $delete_mic) = @_; + update_kw_maybe($lei, $lse, $eml, $kw); + if ($delete_mic) { + $lei->{net}->imap_delete_1($url, $uid, $delete_mic); + } else { + _augment($eml, $lei); + } } sub _do_augment_imap { my ($self, $lei) = @_; my $net = $lei->{net}; + my $lse = $lei->{sto}->search if $lei->{opt}->{'import-augment'}; if ($lei->{opt}->{augment}) { my $dedupe = $lei->{dedupe}; if ($dedupe && $dedupe->prepare_dedupe) { - $net->imap_each($self->{uri}, \&_augment_imap, $lei); + $net->imap_each($self->{uri}, \&_imap_augment_or_delete, + $lei, $lse); $dedupe->pause_dedupe; } + } elsif ($lse) { + my $delete_mic; + $net->imap_each($self->{uri}, \&_imap_augment_or_delete, + $lei, $lse, \$delete_mic); + $delete_mic->expunge if $delete_mic; } elsif (!$self->{-wq_worker_nr}) { # undef or 0 # clobber existing IMAP folder $net->imap_delete_all($self->{uri}); @@ -539,6 +549,8 @@ sub do_augment { # slow, runs in wq worker # fast (spawn compressor or mkdir), runs in same process as pre_augment sub post_augment { my ($self, $lei, @args) = @_; + my $wait = $lei->{opt}->{'import-augment'} ? + $lei->{sto}->ipc_do('checkpoint', 1) : 0; # _post_augment_mbox my $m = $self->can("_post_augment_$self->{base_type}") or return; $m->($self, $lei, @args); diff --git a/lib/PublicInbox/NetReader.pm b/lib/PublicInbox/NetReader.pm index 96d3b2ed..f5f71005 100644 --- a/lib/PublicInbox/NetReader.pm +++ b/lib/PublicInbox/NetReader.pm @@ -346,9 +346,14 @@ sub _imap_do_msg ($$$$$) { $$raw =~ s/\r\n/\n/sg; my $kw = []; for my $f (split(/ /, $flags)) { - my $k = $IMAPflags2kw{$f} // next; # TODO: X-Label? - push @$kw, $k; + if (my $k = $IMAPflags2kw{$f}) { + push @$kw, $k; + } elsif ($f eq "\\Recent") { # not in JMAP + } elsif ($self->{verbose}) { + warn "# unknown IMAP flag $f <$uri;uid=$uid>\n"; + } } + @$kw = sort @$kw; # for all UI/UX purposes my ($eml_cb, @args) = @{$self->{eml_each}}; $eml_cb->($uri, $uid, $kw, PublicInbox::Eml->new($raw), @args); } diff --git a/lib/PublicInbox/NetWriter.pm b/lib/PublicInbox/NetWriter.pm index e26e9815..49ac02a6 100644 --- a/lib/PublicInbox/NetWriter.pm +++ b/lib/PublicInbox/NetWriter.pm @@ -13,27 +13,58 @@ my %IMAPkw2flags; @IMAPkw2flags{values %PublicInbox::NetReader::IMAPflags2kw} = keys %PublicInbox::NetReader::IMAPflags2kw; +sub kw2flags ($) { join(' ', map { $IMAPkw2flags{$_} } @{$_[0]}) } + sub imap_append { my ($mic, $folder, $bref, $smsg, $eml) = @_; $bref //= \($eml->as_string); $smsg //= bless {}, 'PublicInbox::Smsg'; bless($smsg, 'PublicInbox::Smsg') if ref($smsg) eq 'HASH'; $smsg->{ts} //= msg_timestamp($eml // PublicInbox::Eml->new($$bref)); - my @f = map { $IMAPkw2flags{$_} } @{$smsg->{kw}}; - $mic->append_string($folder, $$bref, "@f", $smsg->internaldate) or + my $f = kw2flags($smsg->{kw}); + $mic->append_string($folder, $$bref, $f, $smsg->internaldate) or die "APPEND $folder: $@"; } +sub mic_for_folder { + my ($self, $uri) = @_; + if (!ref($uri)) { + my $u = PublicInbox::URIimap->new($uri); + $_[1] = $uri = $u; + } + my $mic = $self->mic_get($uri) or die "E: not connected: $@"; + $mic->select($uri->mailbox) or return; + $mic; +} + sub imap_delete_all { my ($self, $url) = @_; - my $uri = PublicInbox::URIimap->new($url); + my $mic = mic_for_folder($self, my $uri = $url) or return; my $sec = $self->can('uri_section')->($uri); local $0 = $uri->mailbox." $sec"; - my $mic = $self->mic_get($uri) or die "E: not connected: $@"; - $mic->select($uri->mailbox) or return; # non-existent if ($mic->delete_message('1:*')) { $mic->expunge; } } +sub imap_delete_1 { + my ($self, $url, $uid, $delete_mic) = @_; + $$delete_mic //= mic_for_folder($self, my $uri = $url) or return; + $$delete_mic->delete_message($uid); +} + +sub imap_set_kw { + my ($self, $url, $uid, $kw) = @_; + my $mic = mic_for_folder($self, my $uri = $url) or return; + $mic->set_flag(kw2flags($kw), $uid); + $mic; # caller must ->expunge +} + +sub imap_unset_kw { + my ($self, $url, $uid, $kw) = @_; + my $mic = mic_for_folder($self, my $uri = $url) or return; + $mic->unset_flag(kw2flags($kw), $uid); + $mic; # caller must ->expunge +} + 1; diff --git a/xt/net_writer-imap.t b/xt/net_writer-imap.t index da435926..c24fa993 100644 --- a/xt/net_writer-imap.t +++ b/xt/net_writer-imap.t @@ -91,7 +91,7 @@ my $smsg = bless { kw => [ 'seen' ] }, 'PublicInbox::Smsg'; $imap_append->($mic, $folder, undef, $smsg, eml_load('t/plack-qp.eml')); $nwr->{quiet} = 1; my $imap_slurp_all = sub { - my ($u, $uid, $kw, $eml, $res) = @_; + my ($url, $uid, $kw, $eml, $res) = @_; push @$res, [ $kw, $eml ]; }; $nwr->imap_each($folder_uri, $imap_slurp_all, my $res = []); @@ -138,10 +138,38 @@ test_lei(sub { $nwr->imap_each($folder_uri, $imap_slurp_all, my $empty = []); is(scalar(@$empty), 0, 'no results w/o augment'); - lei_ok qw(convert -F eml t/msg_iter-order.eml -o), $$folder_uri; + my $f = 't/utf8.eml'; # + $exp = eml_load($f); + lei_ok qw(convert -F eml -o), $$folder_uri, $f; + my (@uid, @res); + $nwr->imap_each($folder_uri, sub { + my ($u, $uid, $kw, $eml) = @_; + push @uid, $uid; + push @res, [ $kw, $eml ]; + }); + is_deeply(\@res, [ [ [], $exp ] ], 'converted to IMAP destination'); + is(scalar(@uid), 1, 'got one UID back'); + lei_ok qw(q -o /dev/stdout m:testmessage@example.com --no-external); + is_deeply(json_utf8->decode($lei_out), [undef], + 'no results before import'); + + lei_ok qw(import -F eml), $f, \'import local copy w/o keywords'; + + $nwr->imap_set_kw($folder_uri, $uid[0], [ 'seen' ])->expunge + or BAIL_OUT "expunge $@"; + @res = (); + $nwr->imap_each($folder_uri, $imap_slurp_all, \@res); + is_deeply(\@res, [ [ ['seen'], $exp ] ], 'seen flag set') or + diag explain(\@res); + + lei_ok qw(q s:thisbetternotgiveanyresult -o), $folder_uri->as_string, + \'clobber folder but import flag'; $nwr->imap_each($folder_uri, $imap_slurp_all, $empty = []); - is_deeply($empty, [ [ [], eml_load('t/msg_iter-order.eml') ] ], - 'converted to IMAP destination'); + is_deeply($empty, [], 'clobbered folder'); + lei_ok qw(q -o /dev/stdout m:testmessage@example.com --no-external); + $res = json_utf8->decode($lei_out)->[0]; + is_deeply([@$res{qw(m kw)}], ['', ['seen']], + 'kw set'); }); undef $cleanup; # remove temporary folder