From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 7D9DA1FA13 for ; Fri, 21 May 2021 10:28:33 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 6/8] lei export-kw: new command to export keywords to Maildirs Date: Fri, 21 May 2021 10:28:30 +0000 Message-Id: <20210521102832.10784-7-e@80x24.org> In-Reply-To: <20210521102832.10784-1-e@80x24.org> References: <20210521102832.10784-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: IMAP will eventually be supported. --- MANIFEST | 2 + lib/PublicInbox/LEI.pm | 4 + lib/PublicInbox/LeiExportKw.pm | 180 +++++++++++++++++++++++++++++++++ lib/PublicInbox/LeiMailSync.pm | 10 ++ lib/PublicInbox/LeiSearch.pm | 14 +++ lib/PublicInbox/LeiToMail.pm | 8 +- lib/PublicInbox/MdirReader.pm | 14 +++ t/lei-export-kw.t | 35 +++++++ t/mdir_reader.t | 5 + 9 files changed, 270 insertions(+), 2 deletions(-) create mode 100644 lib/PublicInbox/LeiExportKw.pm create mode 100644 t/lei-export-kw.t diff --git a/MANIFEST b/MANIFEST index 684128aa..2d1ad5c3 100644 --- a/MANIFEST +++ b/MANIFEST @@ -202,6 +202,7 @@ lib/PublicInbox/LeiConvert.pm lib/PublicInbox/LeiCurl.pm lib/PublicInbox/LeiDedupe.pm lib/PublicInbox/LeiEditSearch.pm +lib/PublicInbox/LeiExportKw.pm lib/PublicInbox/LeiExternal.pm lib/PublicInbox/LeiForgetSearch.pm lib/PublicInbox/LeiHelp.pm @@ -408,6 +409,7 @@ t/iso-2202-jp.eml t/kqnotify.t t/lei-convert.t t/lei-daemon.t +t/lei-export-kw.t t/lei-externals.t t/lei-import-http.t t/lei-import-imap.t diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index 15680fe3..628908b5 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -243,6 +243,10 @@ our %CMD = ( # sorted in order of importance/use: qw(stdin| offset=i recursive|r exclude=s include|I=s lock=s@ in-format|F=s kw! verbose|v+ incremental! mail-sync!), qw(no-torsocks torsocks=s), PublicInbox::LeiQuery::curl_opt(), @c_opt ], + +'export-kw' => [ 'LOCATION...|--all', + 'one-time export of keywords of sync sources', + qw(all:s mode=s), @c_opt ], 'convert' => [ 'LOCATION...|--stdin', 'one-time conversion from URL or filesystem to another format', qw(stdin| in-format|F=s out-format|f=s output|mfolder|o=s lock=s@ kw!), diff --git a/lib/PublicInbox/LeiExportKw.pm b/lib/PublicInbox/LeiExportKw.pm new file mode 100644 index 00000000..db4f7441 --- /dev/null +++ b/lib/PublicInbox/LeiExportKw.pm @@ -0,0 +1,180 @@ +# Copyright (C) 2021 all contributors +# License: AGPL-3.0+ + +# front-end for the "lei export-kw" sub-command +package PublicInbox::LeiExportKw; +use strict; +use v5.10.1; +use parent qw(PublicInbox::IPC PublicInbox::LeiInput); +use Errno qw(EEXIST ENOENT); + +sub export_kw_md { # LeiMailSync->each_src callback + my ($oidbin, $id, $self, $mdir) = @_; + my $oidhex = unpack('H*', $oidbin); + my $sto_kw = $self->{lse}->oid_keywords($oidhex) or return; + my $bn = $$id; + my ($md_kw, $unknown, @try); + if ($bn =~ s/:2,([a-zA-Z]*)\z//) { + ($md_kw, $unknown) = PublicInbox::MdirReader::flags2kw($1); + @try = qw(cur new); + } else { + $unknown = []; + @try = qw(new cur); + } + if ($self->{-merge_kw} && $md_kw) { # merging keywords is the default + @$sto_kw{keys %$md_kw} = values(%$md_kw); + } + $bn .= ':2,'. + PublicInbox::LeiToMail::kw2suffix([keys %$sto_kw], @$unknown); + my $dst = "$mdir/cur/$bn"; + my @fail; + for my $d (@try) { + my $src = "$mdir/$d/$$id"; + next if $src eq $dst; + + # we use link(2) + unlink(2) since rename(2) may + # inadvertently clobber if the "uniquefilename" part wasn't + # actually unique. + if (link($src, $dst)) { # success + # unlink(2) may ENOENT from parallel invocation, + # ignore it, but not other serious errors + if (!unlink($src) and $! != ENOENT) { + $self->{lei}->child_error(1, + "E: unlink($src): $!"); + } + $self->{lms}->mv_src("maildir:$mdir", + $oidbin, $id, $bn) or die; + return; # success anyways if link(2) worked + } + if ($! == ENOENT && !-e $src) { # some other process moved it + $self->{lms}->clear_src("maildir:$mdir", $id); + next; + } + push @fail, $src if $! != EEXIST; + } + return unless @fail; + # both tries failed + my $e = $!; + my $orig = '['.join('|', @fail).']'; + $self->{lei}->child_error(1, "link($orig, $dst) ($oidhex): $e"); +} + +# overrides PublicInbox::LeiInput::input_path_url +sub input_path_url { + my ($self, $input, @args) = @_; + my $lms = $self->{lms} //= $self->{lse}->lms; + $lms->lms_begin; + if ($input =~ s/\Amaildir://i) { + require PublicInbox::LeiToMail; # kw2suffix + $lms->each_src("maildir:$input", \&export_kw_md, $self, $input); + } + $lms->lms_commit; +} + +sub lei_export_kw { + my ($lei, @folders) = @_; + my $sto = $lei->_lei_store or return $lei->fail(<search; + my $lms = $lse->lms or return $lei->fail(<{opt}; + my $all = $opt->{all}; + my @all = $lms->folders; + if (defined $all) { # --all= + my %x = map { $_ => $_ } split(/,/, $all); + my @ok = grep(defined, delete(@x{qw(local remote), ''})); + my @no = keys %x; + if (@no) { + @no = (join(',', @no)); + return $lei->fail(<fail("--all=$all not understood"); + } else { + @inc = @all; + } + for (@inc) { + push(@folders, $_) unless $seen{$_}++; + } + } + return $lei->fail(< 1 } @all; + my @no; + for (@folders) { + next if $all{$_}; # ok + if (-d "$_/new" && -d "$_/cur") { + my $d = 'maildir:'.$lei->rel2abs($_); + push(@no, $_) unless $all{$d}; + $_ = $d; + } else { + push @no, $_; + } + } + my $no = join("\n\t", @no); + return $lei->fail(< $lse }, __PACKAGE__; + $lei->{opt}->{'mail-sync'} = 1; # for prepare_inputs + $self->prepare_inputs($lei, \@folders) or return; + my $j = $opt->{jobs} // scalar(@{$self->{inputs}}) || 1; + if (my @ro = grep(!/\A(?:maildir|imaps?):/, @folders)) { + return $lei->fail("cannot export to read-only folders: @ro"); + } + if (my $net = $lei->{net}) { + require PublicInbox::NetWriter; + bless $net, 'PublicInbox::NetWriter'; + } + undef $lms; + my $m = $opt->{mode} // 'merge'; + if ($m eq 'merge') { # default + $self->{-merge_kw} = 1; + } elsif ($m eq 'set') { + } else { + return $lei->fail(<{auth}->op_merge($ops, $self) if $lei->{auth}; + $self->{-wq_nr_workers} = $j // 1; # locked + (my $op_c, $ops) = $lei->workers_start($self, $j, $ops); + $lei->{wq1} = $self; + $lei->{-err_type} = 'non-fatal'; + net_merge_all_done($self) unless $lei->{auth}; + $op_c->op_wait_event($ops); # calls net_merge_all_done if $lei->{auth} +} + +sub _complete_export_kw { + my ($lei, @argv) = @_; + my $sto = $lei->_lei_store or return; + my $lms = $sto->search->lms or return; + my $match_cb = $lei->complete_url_prepare(\@argv); + map { $match_cb->($_) } $lms->folders; +} + +no warnings 'once'; + +*ipc_atfork_child = \&PublicInbox::LeiInput::input_only_atfork_child; +*net_merge_all_done = \&PublicInbox::LeiInput::input_only_net_merge_all_done; + +# the following works even when LeiAuth is lazy-loaded +*net_merge_all = \&PublicInbox::LeiAuth::net_merge_all; + +1; diff --git a/lib/PublicInbox/LeiMailSync.pm b/lib/PublicInbox/LeiMailSync.pm index 3bada42d..32e17c65 100644 --- a/lib/PublicInbox/LeiMailSync.pm +++ b/lib/PublicInbox/LeiMailSync.pm @@ -138,6 +138,16 @@ DELETE FROM blob2num WHERE fid = ? AND uid = ? $sth->execute($fid, $id); } +# Maildir-only +sub mv_src { + my ($self, $folder, $oidbin, $id, $newbn) = @_; + my $fid = $self->{fmap}->{$folder} //= _fid_for($self, $folder, 1); + my $sth = $self->{dbh}->prepare_cached(<<''); +UPDATE blob2name SET name = ? WHERE fid = ? AND oidbin = ? AND name = ? + + $sth->execute($newbn, $fid, $oidbin, $$id); +} + # read-only, iterates every oidbin + UID or name for a given folder sub each_src { my ($self, $folder, $cb, @args) = @_; diff --git a/lib/PublicInbox/LeiSearch.pm b/lib/PublicInbox/LeiSearch.pm index fb19229f..9297d060 100644 --- a/lib/PublicInbox/LeiSearch.pm +++ b/lib/PublicInbox/LeiSearch.pm @@ -27,6 +27,20 @@ sub msg_keywords { wantarray ? sort(keys(%$kw)) : $kw; } +# returns undef if blob is unknown +sub oid_keywords { + my ($self, $oidhex) = @_; + my @num = $self->over->blob_exists($oidhex) or return; + my $xdb = $self->xdb; # set {nshard}; + my %kw; + for my $num (@num) { # there should only be one... + my $doc = $xdb->get_document(num2docid($self, $num)); + my $x = xap_terms('K', $doc); + %kw = (%kw, %$x); + } + \%kw; +} + # lookup keywords+labels for external messages sub xsmsg_vmd { my ($self, $smsg, $want_label) = @_; diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm index 0cbdff8b..96a1f881 100644 --- a/lib/PublicInbox/LeiToMail.pm +++ b/lib/PublicInbox/LeiToMail.pm @@ -243,10 +243,14 @@ sub _rand () { sprintf('%x,%x,%x,%x', rand(0xffffffff), time, $$, ++$seq); } +sub kw2suffix ($;@) { + my $kw = shift; + join('', sort(map { $kw2char{$_} // () } @$kw, @_)); +} + sub _buf2maildir { my ($dst, $buf, $smsg) = @_; my $kw = $smsg->{kw} // []; - my $sfx = join('', sort(map { $kw2char{$_} // () } @$kw)); my $rand = ''; # chosen by die roll :P my ($tmp, $fh, $base, $ok); my $common = $smsg->{blob} // _rand; @@ -263,7 +267,7 @@ sub _buf2maildir { $dst .= 'cur/'; $rand = ''; do { - $base = $rand.$common.':2,'.$sfx + $base = $rand.$common.':2,'.kw2suffix($kw); } while (!($ok = link($tmp, $dst.$base)) && $!{EEXIST} && ($rand = _rand.',')); die "link($tmp, $dst$base): $!" unless $ok; diff --git a/lib/PublicInbox/MdirReader.pm b/lib/PublicInbox/MdirReader.pm index 7a0641fb..304be63d 100644 --- a/lib/PublicInbox/MdirReader.pm +++ b/lib/PublicInbox/MdirReader.pm @@ -86,4 +86,18 @@ sub maildir_each_eml { sub new { bless {}, __PACKAGE__ } +sub flags2kw ($) { + my @unknown; + my %kw; + for (split(//, $_[0])) { + my $k = $c2kw{$_}; + if (defined($k)) { + $kw{$k} = 1; + } else { + push @unknown, $_; + } + } + (\%kw, \@unknown); +} + 1; diff --git a/t/lei-export-kw.t b/t/lei-export-kw.t new file mode 100644 index 00000000..9531949a --- /dev/null +++ b/t/lei-export-kw.t @@ -0,0 +1,35 @@ +#!perl -w +# Copyright (C) 2021 all contributors +# License: AGPL-3.0+ +use strict; use v5.10.1; use PublicInbox::TestCommon; +use File::Copy qw(cp); +use File::Path qw(make_path); +require_mods(qw(lei -imapd Mail::IMAPClient)); +my ($tmpdir, $for_destroy) = tmpdir; +my ($ro_home, $cfg_path) = setup_public_inboxes; +my $expect = eml_load('t/data/0001.patch'); +test_lei({ tmpdir => $tmpdir }, sub { + my $home = $ENV{HOME}; + my $md = "$home/md"; + make_path("$md/new", "$md/cur", "$md/tmp"); + cp('t/data/0001.patch', "$md/new/y") or xbail "cp $md $!"; + cp('t/data/message_embed.eml', "$md/cur/x:2,S") or xbail "cp $md $!"; + lei_ok qw(index -q), $md; + lei_ok qw(tag t/data/0001.patch +kw:seen); + lei_ok qw(export-kw --all=local); + ok(!-e "$md/new/y", 'original gone'); + is_deeply(eml_load("$md/cur/y:2,S"), $expect, + "`seen' kw exported"); + + lei_ok qw(tag t/data/0001.patch +kw:answered); + lei_ok qw(export-kw --all=local); + ok(!-e "$md/cur/y:2,S", 'seen-only file gone'); + is_deeply(eml_load("$md/cur/y:2,RS"), $expect, "`R' added"); + + lei_ok qw(tag t/data/0001.patch -kw:answered -kw:seen); + lei_ok qw(export-kw --mode=set --all=local); + ok(!-e "$md/cur/y:2,RS", 'seen+answered file gone'); + is_deeply(eml_load("$md/cur/y:2,"), $expect, 'no keywords left'); +}); + +done_testing; diff --git a/t/mdir_reader.t b/t/mdir_reader.t index 51b38af4..c927e1a7 100644 --- a/t/mdir_reader.t +++ b/t/mdir_reader.t @@ -19,4 +19,9 @@ is(maildir_path_flags('/path/to/foo:2,'), '', 'no flags in path'); use_ok 'PublicInbox::InboxWritable', qw(eml_from_path); is(eml_from_path('.'), undef, 'eml_from_path fails on directory'); +is_deeply([PublicInbox::MdirReader::flags2kw('S')], [{ 'seen' => 1 }, []], + "`seen' kw set from flag"); +is_deeply([PublicInbox::MdirReader::flags2kw('Su')], [{ 'seen' => 1 }, ['u']], + 'unknown flag ignored'); + done_testing;