2/5 is the major milestone to me, it took me a long while to figure out and arrive at. PATCH 1/5 made things click, and <https://public-inbox.org/meta/20210320023800.1809-1-e@80x24.org/> ("lei_store: initialize IPC lock properly") was also needed :x There'll still need to be some followup to support import, and also to deal with public-inbox-edit/purge in externals. Inotify + EVFILT_VNODE support should make things sweeter, too. Eric Wong (5): lei: All Local Externals: bare git dir for alternates lei q: support vmd for external-only messages lei q: put keywords on one line in --pretty output lei_to_mail: match mutt order of status headers lei: tie ALE lifetime to config file MANIFEST | 1 + lib/PublicInbox/LEI.pm | 15 +++++ lib/PublicInbox/LeiALE.pm | 111 +++++++++++++++++++++++++++++++++ lib/PublicInbox/LeiOverview.pm | 12 +++- lib/PublicInbox/LeiQuery.pm | 1 + lib/PublicInbox/LeiSearch.pm | 37 +++++++---- lib/PublicInbox/LeiStore.pm | 82 ++++++++++++++---------- lib/PublicInbox/LeiToMail.pm | 38 ++++++----- lib/PublicInbox/LeiXSearch.pm | 44 +++---------- lib/PublicInbox/Lock.pm | 2 +- lib/PublicInbox/Over.pm | 22 ++++++- lib/PublicInbox/OverIdx.pm | 10 --- lib/PublicInbox/SearchIdx.pm | 3 + t/eml.t | 2 + t/lei-convert.t | 3 +- t/lei-externals.t | 3 +- t/lei-q-kw.t | 59 ++++++++++++++++-- t/lei-q-remote-import.t | 4 +- t/lei-q-thread.t | 7 ++- t/lei_to_mail.t | 2 +- t/lei_xsearch.t | 22 ++++++- 21 files changed, 355 insertions(+), 125 deletions(-) create mode 100644 lib/PublicInbox/LeiALE.pm
This will be used for keyword (and label) storage for externals. We'll be using this to ensure we don't redundantly auto-import messages into lei/store if they're already in a local external (they can still be imported explicitly via "lei import"). --- MANIFEST | 1 + lib/PublicInbox/LEI.pm | 16 ++++++ lib/PublicInbox/LeiALE.pm | 98 ++++++++++++++++++++++++++++++++++ lib/PublicInbox/LeiExternal.pm | 6 +++ lib/PublicInbox/LeiOverview.pm | 3 +- lib/PublicInbox/LeiQuery.pm | 5 ++ lib/PublicInbox/LeiStore.pm | 5 +- lib/PublicInbox/LeiToMail.pm | 10 ++-- lib/PublicInbox/LeiXSearch.pm | 27 +--------- lib/PublicInbox/Lock.pm | 2 +- t/lei-externals.t | 3 +- t/lei_xsearch.t | 22 +++++++- 12 files changed, 158 insertions(+), 40 deletions(-) create mode 100644 lib/PublicInbox/LeiALE.pm diff --git a/MANIFEST b/MANIFEST index 775de5cd..b6b4a3ab 100644 --- a/MANIFEST +++ b/MANIFEST @@ -179,6 +179,7 @@ lib/PublicInbox/InputPipe.pm lib/PublicInbox/Isearch.pm lib/PublicInbox/KQNotify.pm lib/PublicInbox/LEI.pm +lib/PublicInbox/LeiALE.pm lib/PublicInbox/LeiAuth.pm lib/PublicInbox/LeiConvert.pm lib/PublicInbox/LeiCurl.pm diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index d20ba744..0da26a32 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -97,6 +97,22 @@ sub _config_path ($) { .'/lei/config'); } +sub cache_dir ($) { + my ($self) = @_; + rel2abs($self, ($self->{env}->{XDG_CACHE_HOME} // + ($self->{env}->{HOME} // '/nonexistent').'/.cache') + .'/lei'); +} + +sub ale { + my ($self) = @_; + $self->{ale} //= do { + require PublicInbox::LeiALE; + PublicInbox::LeiALE->new(cache_dir($self). + '/all_locals_ever.git'); + }; +} + sub index_opt { # TODO: drop underscore variants everywhere, they're undocumented qw(fsync|sync! jobs|j=i indexlevel|L=s compact diff --git a/lib/PublicInbox/LeiALE.pm b/lib/PublicInbox/LeiALE.pm new file mode 100644 index 00000000..bdb50a1a --- /dev/null +++ b/lib/PublicInbox/LeiALE.pm @@ -0,0 +1,98 @@ +# Copyright (C) 2021 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> + +# All Locals Ever: track lei/store + externals ever used as +# long as they're on an accessible FS. Includes "lei q" --include +# and --only targets that haven't been through "lei add-external". +# Typically: ~/.cache/lei/all_locals_ever.git +package PublicInbox::LeiALE; +use strict; +use v5.10.1; +use parent qw(PublicInbox::LeiSearch PublicInbox::Lock); +use PublicInbox::Git; +use PublicInbox::Import; +use Fcntl qw(SEEK_SET); + +sub new { + my ($cls, $d) = @_; + PublicInbox::Import::init_bare($d, 'ale'); + bless { + git => PublicInbox::Git->new($d), + lock_path => "$d/lei_ale.state", # dual-duty lock + state + ibxish => [], # Inbox and ExtSearch (and LeiSearch) objects + }, $cls; +} + +sub over {} # undef for xoids_for + +sub overs_all { # for xoids_for (called only in lei workers?) + my ($self) = @_; + my $pid = $$; + if (($self->{owner_pid} // $pid) != $pid) { + delete($_->{over}) for @{$self->{ibxish}}; + } + $self->{owner_pid} = $pid; + grep(defined, map { $_->over } @{$self->{ibxish}}); +} + +sub refresh_externals { + my ($self, $lxs) = @_; + $self->git->cleanup; + my $lk = $self->lock_for_scope; + my $cur_lxs = ref($lxs)->new; + my $orig = do { + local $/; + readline($self->{lockfh}) // + die "readline($self->{lock_path}): $!"; + }; + my $new = ''; + my $old = ''; + my $gone = 0; + my %seen_ibxish; # $dir => any-defined value + for my $dir (split(/\n/, $orig)) { + if (-d $dir && -r _ && $cur_lxs->prepare_external($dir)) { + $seen_ibxish{$dir} //= length($old .= "$dir\n"); + } else { + ++$gone; + } + } + my @ibxish = $cur_lxs->locals; + for my $x ($lxs->locals) { + my $d = File::Spec->canonpath($x->{inboxdir} // $x->{topdir}); + $seen_ibxish{$d} //= do { + $new .= "$d\n"; + push @ibxish, $x; + }; + } + if ($new ne '' || $gone) { + $self->{lockfh}->autoflush(1); + if ($gone) { + seek($self->{lockfh}, 0, SEEK_SET) or die "seek: $!"; + truncate($self->{lockfh}, 0) or die "truncate: $!"; + } else { + $old = ''; + } + print { $self->{lockfh} } $old, $new or die "print: $!"; + } + $new = $old = ''; + my $f = $self->git->{git_dir}.'/objects/info/alternates'; + if (open my $fh, '<', $f) { + local $/; + $old = <$fh> // die "readline($f): $!"; + } + for my $x (@ibxish) { + $new .= File::Spec->canonpath($x->git->{git_dir})."/objects\n"; + } + $self->{ibxish} = \@ibxish; + return if $old eq $new; + + # this needs to be atomic since child processes may start + # git-cat-file at any time + my $tmp = "$f.$$.tmp"; + open my $fh, '>', $tmp or die "open($tmp): $!"; + print $fh $new or die "print($tmp): $!"; + close $fh or die "close($tmp): $!"; + rename($tmp, $f) or die "rename($tmp, $f): $!"; +} + +1; diff --git a/lib/PublicInbox/LeiExternal.pm b/lib/PublicInbox/LeiExternal.pm index b5dd85e1..aa09be9e 100644 --- a/lib/PublicInbox/LeiExternal.pm +++ b/lib/PublicInbox/LeiExternal.pm @@ -139,6 +139,12 @@ sub add_external_finish { my $key = "external.$location.boost"; my $cur_boost = $cfg->{$key}; return if defined($cur_boost) && $cur_boost == $new_boost; # idempotent + if (-d $location) { + require PublicInbox::LeiXSearch; + my $lxs = PublicInbox::LeiXSearch->new; + $lxs->prepare_external($location); + $self->ale->refresh_externals($lxs); + } $self->lei_config($key, $new_boost); } diff --git a/lib/PublicInbox/LeiOverview.pm b/lib/PublicInbox/LeiOverview.pm index f6348162..1036f465 100644 --- a/lib/PublicInbox/LeiOverview.pm +++ b/lib/PublicInbox/LeiOverview.pm @@ -209,11 +209,10 @@ sub ovv_each_smsg_cb { # runs in wq worker usually $wcb->(undef, $smsg, $eml); }; } elsif ($l2m && $l2m->{-wq_s1}) { - my $git_dir = $ibxish->git->{git_dir}; sub { my ($smsg, $mitem) = @_; $smsg->{pct} = get_pct($mitem) if $mitem; - $l2m->wq_io_do('write_mail', [], $git_dir, $smsg); + $l2m->wq_io_do('write_mail', [], $smsg); } } elsif ($self->{fmt} =~ /\A(concat)?json\z/ && $lei->{opt}->{pretty}) { my $EOR = ($1//'') eq 'concat' ? "\n}" : "\n},"; diff --git a/lib/PublicInbox/LeiQuery.pm b/lib/PublicInbox/LeiQuery.pm index 532668ae..007e35fc 100644 --- a/lib/PublicInbox/LeiQuery.pm +++ b/lib/PublicInbox/LeiQuery.pm @@ -57,6 +57,10 @@ sub lei_q { } if ($opt->{'local'} //= scalar(@only) ? 0 : 1) { $lxs->prepare_external($lse); + } else { + my $tmp = PublicInbox::LeiXSearch->new; + $tmp->prepare_external($lse); + $self->ale->refresh_externals($tmp); } if (@only) { for my $loc (@only) { @@ -90,6 +94,7 @@ sub lei_q { unless ($lxs->locals || $lxs->remotes) { return $self->fail('no local or remote inboxes to search'); } + $self->ale->refresh_externals($lxs); my ($xj, $mj) = split(/,/, $opt->{jobs} // ''); if (defined($xj) && $xj ne '' && $xj !~ /\A[1-9][0-9]*\z/) { return $self->fail("`$xj' search jobs must be >= 1"); diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm index 26f975c3..c1abc288 100644 --- a/lib/PublicInbox/LeiStore.pm +++ b/lib/PublicInbox/LeiStore.pm @@ -251,10 +251,11 @@ sub refresh_local_externals { for my $loc (@loc) { # locals only $lxs->prepare_external($loc) if -d $loc; } + $self->{lei}->ale->refresh_externals($lxs); + $lxs->{git} = $self->{lei}->ale->git; $self->{lxs_all_local} = $lxs; $self->{cur_cfg} = $cfg; } - ($lxs->{git_tmp} //= $lxs->git_tmp)->{git_dir}; } sub write_prepare { @@ -268,7 +269,7 @@ sub write_prepare { $self->ipc_worker_spawn('lei_store', $lei->oldset, { lei => $lei }); } - $lei->{all_ext_git_dir} = $self->ipc_do('refresh_local_externals'); + my $wait = $self->ipc_do('refresh_local_externals'); $lei->{sto} = $self; } diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm index 6f386b10..7e821646 100644 --- a/lib/PublicInbox/LeiToMail.pm +++ b/lib/PublicInbox/LeiToMail.pm @@ -11,7 +11,6 @@ use PublicInbox::Lock; use PublicInbox::ProcessPipe; use PublicInbox::Spawn qw(which spawn popen_rd); use PublicInbox::LeiDedupe; -use PublicInbox::Git; use PublicInbox::GitAsyncCat; use PublicInbox::PktOp qw(pkt_do); use Symbol qw(gensym); @@ -642,18 +641,15 @@ sub poke_dst { } sub write_mail { # via ->wq_io_do - my ($self, $git_dir, $smsg) = @_; - my $git = $self->{"$$\0$git_dir"} //= PublicInbox::Git->new($git_dir); - git_async_cat($git, $smsg->{blob}, \&git_to_mail, + my ($self, $smsg) = @_; + git_async_cat($self->{lei}->{ale}->git, $smsg->{blob}, \&git_to_mail, [$self->{wcb}, $smsg]); } sub wq_atexit_child { my ($self) = @_; delete $self->{wcb}; - for my $git (delete @$self{grep(/\A$$\0/, keys %$self)}) { - $git->async_wait_all; - } + $self->{lei}->{ale}->git->async_wait_all; $SIG{__WARN__} = 'DEFAULT'; } diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm index d95a218e..1266b3b3 100644 --- a/lib/PublicInbox/LeiXSearch.pm +++ b/lib/PublicInbox/LeiXSearch.pm @@ -297,27 +297,7 @@ sub query_remote_mboxrd { $lei->{ovv}->ovv_atexit_child($lei); } -# called by LeiOverview::each_smsg_cb -sub git { $_[0]->{git_tmp} // die 'BUG: caller did not set {git_tmp}' } - -sub git_tmp ($) { - my ($self) = @_; - my (%seen, @dirs); - my $tmp = File::Temp->newdir("lei_xsearch_git.$$-XXXX", TMPDIR => 1); - for my $ibxish (locals($self)) { - my $d = File::Spec->canonpath($ibxish->git->{git_dir}); - $seen{$d} //= push @dirs, "$d/objects\n" - } - my $git_dir = $tmp->dirname; - PublicInbox::Import::init_bare($git_dir); - my $f = "$git_dir/objects/info/alternates"; - open my $alt, '>', $f or die "open($f): $!"; - print $alt @dirs or die "print $f: $!"; - close $alt or die "close $f: $!"; - my $git = PublicInbox::Git->new($git_dir); - $git->{-tmp} = $tmp; - $git; -} +sub git { $_[0]->{git} // die 'BUG: git uninitialized' } sub xsearch_done_wait { # dwaitpid callback my ($arg, $pid) = @_; @@ -460,11 +440,6 @@ sub do_query { # 1031: F_SETPIPE_SZ fcntl($lei->{startq}, 1031, 4096) if $^O eq 'linux'; } - if (!$lei->{opt}->{threads} && locals($self)) { # for query_mset - # lei->{git_tmp} is set for wq_wait_old so we don't - # delete until all lei2mail + lei_xsearch workers are reaped - $lei->{git_tmp} = $self->{git_tmp} = git_tmp($self); - } $self->wq_workers_start('lei_xsearch', undef, $lei->oldset, { lei => $lei }); my $op = delete $lei->{pkt_op_c}; diff --git a/lib/PublicInbox/Lock.pm b/lib/PublicInbox/Lock.pm index 76c3ffb2..0ee2a8bd 100644 --- a/lib/PublicInbox/Lock.pm +++ b/lib/PublicInbox/Lock.pm @@ -16,7 +16,7 @@ sub lock_acquire { my $lock_path = $self->{lock_path}; croak 'already locked '.($lock_path // '(undef)') if $self->{lockfh}; return unless defined($lock_path); - sysopen(my $lockfh, $lock_path, O_WRONLY|O_CREAT) or + sysopen(my $lockfh, $lock_path, O_RDWR|O_CREAT) or croak "failed to open $lock_path: $!\n"; flock($lockfh, LOCK_EX) or croak "lock $lock_path failed: $!\n"; $self->{lockfh} = $lockfh; diff --git a/t/lei-externals.t b/t/lei-externals.t index 1d2a9a16..2045691f 100644 --- a/t/lei-externals.t +++ b/t/lei-externals.t @@ -236,7 +236,8 @@ test_lei(sub { is(scalar(@s), 2, "2 results in mbox$sfx"); lei_ok('q', '-a', '-o', "mboxcl2:$f", 's:nonexistent'); - is(grep(!/^#/, $lei_err), 0, "no errors on no results ($sfx)"); + is(grep(!/^#/, $lei_err), 0, "no errors on no results ($sfx)") + or diag $lei_err; my @s2 = grep(/^Subject:/, $cat->()); is_deeply(\@s2, \@s, diff --git a/t/lei_xsearch.t b/t/lei_xsearch.t index f626c790..68211d18 100644 --- a/t/lei_xsearch.t +++ b/t/lei_xsearch.t @@ -10,6 +10,7 @@ require_mods(qw(DBD::SQLite Search::Xapian)); require PublicInbox::ExtSearchIdx; require_git 2.6; require_ok 'PublicInbox::LeiXSearch'; +require_ok 'PublicInbox::LeiALE'; my ($home, $for_destroy) = tmpdir(); my @ibx; for my $V (1..2) { @@ -75,7 +76,8 @@ is($lxs->over, undef, '->over fails'); my $v2ibx = create_inbox 'v2full', version => 2, sub { $_[0]->add(eml_load('t/plack-qp.eml')); }; - my $v1ibx = create_inbox 'v1medium', indexlevel => 'medium', sub { + my $v1ibx = create_inbox 'v1medium', indexlevel => 'medium', + tmpdir => "$home/v1tmp", sub { $_[0]->add(eml_load('t/utf8.eml')); }; $lxs->prepare_external($v1ibx); @@ -85,6 +87,24 @@ is($lxs->over, undef, '->over fails'); } my $mset = $lxs->mset('m:testmessage@example.com'); is($mset->size, 1, 'got m: match on medium+full XSearch mix'); + my $mitem = ($mset->items)[0]; + my $smsg = $lxs->smsg_for($mitem) or BAIL_OUT 'smsg_for broken'; + + my $ale = PublicInbox::LeiALE->new("$home/ale"); + $ale->refresh_externals($lxs); + my $exp = [ $smsg->{blob}, 'blob', -s 't/utf8.eml' ]; + is_deeply([ $ale->git->check($smsg->{blob}) ], $exp, 'ale->git->check'); + + $lxs = PublicInbox::LeiXSearch->new; + $lxs->prepare_external($v2ibx); + $ale->refresh_externals($lxs); + is_deeply([ $ale->git->check($smsg->{blob}) ], $exp, + 'ale->git->check remembered inactive external'); + + rename("$home/v1tmp", "$home/v1moved") or BAIL_OUT "rename: $!"; + $ale->refresh_externals($lxs); + is($ale->git->check($smsg->{blob}), undef, + 'missing after directory gone'); } done_testing;
"lei q" now preserves changes per-message keywords across invocations when it's --output (Maildir or mbox) is reused (with or without --augment). In the future, these changes will be monitored via inotify, EVFILT_VNODE or IMAP IDLE, too. Unfortunately, this currently prevents "lei import" from ever importing a message that's in an external. That will be fixed in a future change. --- lib/PublicInbox/LeiOverview.pm | 4 ++ lib/PublicInbox/LeiSearch.pm | 37 ++++++++++----- lib/PublicInbox/LeiStore.pm | 83 ++++++++++++++++++++-------------- lib/PublicInbox/LeiToMail.pm | 16 ++++++- lib/PublicInbox/LeiXSearch.pm | 17 +++---- lib/PublicInbox/Over.pm | 22 ++++++++- lib/PublicInbox/OverIdx.pm | 10 ---- lib/PublicInbox/SearchIdx.pm | 3 ++ t/eml.t | 2 + t/lei-convert.t | 3 +- t/lei-q-kw.t | 48 +++++++++++++++++++- t/lei-q-thread.t | 7 +-- 12 files changed, 177 insertions(+), 75 deletions(-) diff --git a/lib/PublicInbox/LeiOverview.pm b/lib/PublicInbox/LeiOverview.pm index 1036f465..48237f8a 100644 --- a/lib/PublicInbox/LeiOverview.pm +++ b/lib/PublicInbox/LeiOverview.pm @@ -216,9 +216,11 @@ sub ovv_each_smsg_cb { # runs in wq worker usually } } elsif ($self->{fmt} =~ /\A(concat)?json\z/ && $lei->{opt}->{pretty}) { my $EOR = ($1//'') eq 'concat' ? "\n}" : "\n},"; + my $lse = $lei->{sto}->search; sub { # DIY prettiness :P my ($smsg, $mitem) = @_; return if $dedupe->is_smsg_dup($smsg); + $lse->xsmsg_vmd($smsg); $smsg = _unbless_smsg($smsg, $mitem); $buf .= "{\n"; $buf .= join(",\n", map { @@ -238,9 +240,11 @@ sub ovv_each_smsg_cb { # runs in wq worker usually } } elsif ($json) { my $ORS = $self->{fmt} eq 'json' ? ",\n" : "\n"; # JSONL + my $lse = $lei->{sto}->search; sub { my ($smsg, $mitem) = @_; return if $dedupe->is_smsg_dup($smsg); + $lse->xsmsg_vmd($smsg); $buf .= $json->encode(_unbless_smsg(@_)) . $ORS; return if length($buf) < 65536; my $lk = $self->lock_for_scope; diff --git a/lib/PublicInbox/LeiSearch.pm b/lib/PublicInbox/LeiSearch.pm index 2e3f10fd..360a37e5 100644 --- a/lib/PublicInbox/LeiSearch.pm +++ b/lib/PublicInbox/LeiSearch.pm @@ -27,6 +27,20 @@ sub msg_keywords { wantarray ? sort(keys(%$kw)) : $kw; } +sub xsmsg_vmd { + my ($self, $smsg) = @_; + return if $smsg->{kw}; + my $xdb = $self->xdb; # set {nshard}; + my %kw; + $kw{flagged} = 1 if delete($smsg->{lei_q_tt_flagged}); + my @num = $self->over->blob_exists($smsg->{blob}); + for my $num (@num) { # there should only be one... + my $kw = xap_terms('K', $xdb, num2docid($self, $num)); + %kw = (%kw, %$kw); + } + $smsg->{kw} = [ sort keys %kw ] if scalar(keys(%kw)); +} + # when a message has no Message-IDs at all, this is needed for # unsent Draft messages, at least sub content_key ($) { @@ -43,41 +57,42 @@ sub content_key ($) { } sub _cmp_1st { # git->cat_async callback - my ($bref, $oid, $type, $size, $cmp) = @_; # cmp: [chash, found, smsg] - if (content_hash(PublicInbox::Eml->new($bref)) eq $cmp->[0]) { + my ($bref, $oid, $type, $size, $cmp) = @_; # cmp: [chash, xoids, smsg] + if ($bref && content_hash(PublicInbox::Eml->new($bref)) eq $cmp->[0]) { $cmp->[1]->{$oid} = $cmp->[2]->{num}; } } -sub xids_for { # returns { OID => docid } mapping for $eml matches +sub xoids_for { # returns { OID => docid } mapping for $eml matches my ($self, $eml, $min) = @_; my ($chash, $mids) = content_key($eml); my @overs = ($self->over // $self->overs_all); my $git = $self->git; - my $found = {}; + my $xoids = {}; for my $mid (@$mids) { for my $o (@overs) { my ($id, $prev); while (my $cur = $o->next_by_mid($mid, \$id, \$prev)) { - next if $found->{$cur->{blob}}; + next if $cur->{bytes} == 0 || + $xoids->{$cur->{blob}}; $git->cat_async($cur->{blob}, \&_cmp_1st, - [ $chash, $found, $cur ]); - if ($min && scalar(keys %$found) >= $min) { + [ $chash, $xoids, $cur ]); + if ($min && scalar(keys %$xoids) >= $min) { $git->cat_async_wait; - return $found; + return $xoids; } } } } $git->cat_async_wait; - scalar(keys %$found) ? $found : undef; + scalar(keys %$xoids) ? $xoids : undef; } # returns true if $eml is indexed by lei/store and keywords don't match sub kw_changed { my ($self, $eml, $new_kw_sorted) = @_; - my $found = xids_for($self, $eml, 1) // return; - my ($num) = values %$found; + my $xoids = xoids_for($self, $eml, 1) // return; + my ($num) = values %$xoids; my @cur_kw = msg_keywords($self, $num); join("\0", @$new_kw_sorted) eq join("\0", @cur_kw) ? 0 : 1; } diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm index c1abc288..c66d3dc2 100644 --- a/lib/PublicInbox/LeiStore.pm +++ b/lib/PublicInbox/LeiStore.pm @@ -114,6 +114,7 @@ sub _docids_for ($$) { for my $mid (@$mids) { my ($id, $prev); while (my $cur = $oidx->next_by_mid($mid, \$id, \$prev)) { + next if $cur->{bytes} == 0; # external-only message my $oid = $cur->{blob}; my $docid = $cur->{num}; my $bref = $im ? $im->cat_blob($oid) : undef; @@ -163,7 +164,7 @@ sub add_eml { my ($self, $eml, $vmd) = @_; my $im = $self->importer; # may create new epoch my $eidx = eidx_init($self); # writes ALL.git/objects/info/alternates - my $oidx = $eidx->{oidx}; + my $oidx = $eidx->{oidx}; # PublicInbox::Import::add checks this my $smsg = bless { -oidx => $oidx }, 'PublicInbox::Smsg'; $im->add($eml, undef, $smsg) or return; # duplicate returns undef @@ -193,22 +194,54 @@ sub set_eml { add_eml($self, $eml, $vmd) // set_eml_vmd($self, $eml, $vmd); } -sub add_eml_maybe { - my ($self, $eml) = @_; - my $lxs = $self->{lxs_all_local} // die 'BUG: no {lxs_all_local}'; - return if $lxs->xids_for($eml, 1); - add_eml($self, $eml); -} - # set or update keywords for external message, called via ipc_do -sub set_xkw { - my ($self, $eml, $kw) = @_; - my $lxs = $self->{lxs_all_local} // die 'BUG: no {lxs_all_local}'; - if ($lxs->xids_for($eml, 1)) { # is it in a local external? - # TODO: index keywords only - } else { - set_eml($self, $eml, { kw => $kw }); +sub set_xvmd { + my ($self, $xoids, $eml, $vmd) = @_; + + my $eidx = eidx_init($self); + my $oidx = $eidx->{oidx}; + + # see if we can just update existing docs + for my $oid (keys %$xoids) { + my @docids = $oidx->blob_exists($oid) or next; + scalar(@docids) > 1 and + warn "W: $oid indexed as multiple docids: @docids\n"; + for my $docid (@docids) { + my $idx = $eidx->idx_shard($docid); + $idx->ipc_do('set_vmd', $docid, $vmd); + } + delete $xoids->{$oid}; # all done with this oid } + return unless scalar(keys(%$xoids)); + + # see if it was indexed, but with different OID(s) + if (my @docids = _docids_for($self, $eml)) { + for my $docid (@docids) { + for my $oid (keys %$xoids) { + $oidx->add_xref3($docid, -1, $oid, '.'); + } + my $idx = $eidx->idx_shard($docid); + $idx->ipc_do('set_vmd', $docid, $vmd); + } + return; + } + # totally unseen + my $smsg = bless { blob => '' }, 'PublicInbox::Smsg'; + $smsg->{num} = $oidx->adj_counter('eidx_docid', '+'); + # save space for an externals-only message + my $hdr = $eml->header_obj; + $smsg->populate($hdr); # sets lines == 0 + $smsg->{bytes} = 0; + delete @$smsg{qw(From Subject)}; + $smsg->{to} = $smsg->{cc} = $smsg->{from} = ''; + $oidx->add_overview($hdr, $smsg); # subject+references for threading + $smsg->{subject} = ''; + for my $oid (keys %$xoids) { + $oidx->add_xref3($smsg->{num}, -1, $oid, '.'); + } + my $idx = $eidx->idx_shard($smsg->{num}); + $idx->index_eml(PublicInbox::Eml->new("\n\n"), $smsg); + $idx->ipc_do('add_vmd', $smsg->{num}, $vmd); } sub checkpoint { @@ -240,28 +273,9 @@ sub ipc_atfork_child { $self->SUPER::ipc_atfork_child; } -sub refresh_local_externals { - my ($self) = @_; - my $cfg = $self->{lei}->_lei_cfg or return; - my $cur_cfg = $self->{cur_cfg} // -1; - my $lxs = $self->{lxs_all_local}; - if ($cfg != $cur_cfg || !$lxs) { - $lxs = PublicInbox::LeiXSearch->new; - my @loc = $self->{lei}->externals_each; - for my $loc (@loc) { # locals only - $lxs->prepare_external($loc) if -d $loc; - } - $self->{lei}->ale->refresh_externals($lxs); - $lxs->{git} = $self->{lei}->ale->git; - $self->{lxs_all_local} = $lxs; - $self->{cur_cfg} = $cfg; - } -} - sub write_prepare { my ($self, $lei) = @_; unless ($self->{-ipc_req}) { - require PublicInbox::LeiXSearch; $self->ipc_lock_init($lei->store_path . '/ipc.lock'); # Mail we import into lei are private, so headers filtered out # by -mda for public mail are not appropriate @@ -269,7 +283,6 @@ sub write_prepare { $self->ipc_worker_spawn('lei_store', $lei->oldset, { lei => $lei }); } - my $wait = $self->ipc_do('refresh_local_externals'); $lei->{sto} = $self; } diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm index 7e821646..3e6cf00c 100644 --- a/lib/PublicInbox/LeiToMail.pm +++ b/lib/PublicInbox/LeiToMail.pm @@ -11,6 +11,7 @@ use PublicInbox::Lock; use PublicInbox::ProcessPipe; use PublicInbox::Spawn qw(which spawn popen_rd); use PublicInbox::LeiDedupe; +use PublicInbox::Git; use PublicInbox::GitAsyncCat; use PublicInbox::PktOp qw(pkt_do); use Symbol qw(gensym); @@ -260,10 +261,12 @@ sub _mbox_write_cb ($$) { my $atomic_append = !defined($ovv->{lock_path}); my $dedupe = $lei->{dedupe}; $dedupe->prepare_dedupe; + my $lse = $lei->{sto} ? $lei->{sto}->search : undef; sub { # for git_to_mail my ($buf, $smsg, $eml) = @_; $eml //= PublicInbox::Eml->new($buf); return if $dedupe->is_dup($eml, $smsg->{blob}); + $lse->xsmsg_vmd($smsg) if $lse; $buf = $eml2mbox->($eml, $smsg); return atomic_append($lei, $buf) if $atomic_append; my $lk = $ovv->lock_for_scope; @@ -275,10 +278,15 @@ sub update_kw_maybe ($$$$) { my ($lei, $lse, $eml, $kw) = @_; return unless $lse; my $x = $lse->kw_changed($eml, $kw); + my $vmd = { kw => $kw }; if ($x) { - $lei->{sto}->ipc_do('set_eml', $eml, { kw => $kw }); + $lei->{sto}->ipc_do('set_eml', $eml, $vmd); } elsif (!defined($x)) { - $lei->{sto}->ipc_do('set_xkw', $eml, $kw); + if (my $xoids = $lei->{ale}->xoids_for($eml)) { + $lei->{sto}->ipc_do('set_xvmd', $xoids, $eml, $vmd); + } else { + $lei->{sto}->ipc_do('set_eml', $eml, $vmd); + } } } @@ -342,10 +350,12 @@ sub _maildir_write_cb ($$) { my $dedupe = $lei->{dedupe}; $dedupe->prepare_dedupe if $dedupe; my $dst = $lei->{ovv}->{dst}; + my $lse = $lei->{sto} ? $lei->{sto}->search : undef; sub { # for git_to_mail my ($buf, $smsg, $eml) = @_; $dst // return $lei->fail; # dst may be undef-ed in last run $buf //= \($eml->as_string); + $lse->xsmsg_vmd($smsg) if $lse; return _buf2maildir($dst, $buf, $smsg) if !$dedupe; $eml //= PublicInbox::Eml->new($$buf); # copy buf return if $dedupe->is_dup($eml, $smsg->{blob}); @@ -361,6 +371,7 @@ sub _imap_write_cb ($$) { my $imap_append = $lei->{net}->can('imap_append'); my $mic = $lei->{net}->mic_get($self->{uri}); my $folder = $self->{uri}->mailbox; + my $lse = $lei->{sto} ? $lei->{sto}->search : undef; sub { # for git_to_mail my ($bref, $smsg, $eml) = @_; $mic // return $lei->fail; # dst may be undef-ed in last run @@ -368,6 +379,7 @@ sub _imap_write_cb ($$) { $eml //= PublicInbox::Eml->new($$bref); # copy bref return if $dedupe->is_dup($eml, $smsg->{blob}); } + $lse->xsmsg_vmd($smsg) if $lse; eval { $imap_append->($mic, $folder, $bref, $smsg, $eml) }; if (my $err = $@) { undef $mic; diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm index 1266b3b3..57717b87 100644 --- a/lib/PublicInbox/LeiXSearch.pm +++ b/lib/PublicInbox/LeiXSearch.pm @@ -83,6 +83,7 @@ sub smsg_for { my $num = int(($docid - 1) / $nshard) + 1; my $ibx = $self->{shard2ibx}->[$shard]; my $smsg = $ibx->over->get_art($num); + return if $smsg->{bytes} == 0; mitem_kw($smsg, $mitem) if $ibx->can('msg_keywords'); $smsg->{docid} = $docid; $smsg; @@ -97,11 +98,6 @@ sub recent { sub over {} -sub overs_all { # for xids_for - my ($self) = @_; - grep(defined, map { $_->over } locals($self)) -} - sub _mset_more ($$) { my ($mset, $mo) = @_; my $size = $mset->size; @@ -153,7 +149,7 @@ sub query_thread_mset { # for --threads my $mset; my $each_smsg = $lei->{ovv}->ovv_each_smsg_cb($lei, $ibxish); my $can_kw = !!$ibxish->can('msg_keywords'); - my $fl = $lei->{opt}->{threads} > 1 ? [ 'flagged' ] : undef; + my $fl = $lei->{opt}->{threads} > 1 ? 1 : undef; do { $mset = $srch->mset($mo->{qstr}, $mo); mset_progress($lei, $desc, $mset->size, @@ -165,13 +161,14 @@ sub query_thread_mset { # for --threads while ($over->expand_thread($ctx)) { for my $n (@{$ctx->{xids}}) { my $smsg = $over->get_art($n) or next; - wait_startq($lei); my $mitem = delete $n2item{$smsg->{num}}; + next if $smsg->{bytes} == 0; + wait_startq($lei); # wait for keyword updates if ($mitem) { if ($can_kw) { mitem_kw($smsg, $mitem, $fl); } elsif ($fl) { - $smsg->{kw} = $fl; + $smsg->{lei_q_tt_flagged} = 1; } } $each_smsg->($smsg, $mitem); @@ -209,8 +206,8 @@ sub query_mset { # non-parallel for non-"--threads" users sub each_remote_eml { # callback for MboxReader->mboxrd my ($eml, $self, $lei, $each_smsg) = @_; - if (my $sto = $self->{import_sto}) { - $sto->ipc_do('add_eml_maybe', $eml); + if ($self->{import_sto} && !$lei->{ale}->xoids_for($eml, 1)) { + $self->{import_sto}->ipc_do('add_eml', $eml); } my $smsg = bless {}, 'PublicInbox::Smsg'; $smsg->populate($eml); diff --git a/lib/PublicInbox/Over.pm b/lib/PublicInbox/Over.pm index 06ea439d..587e0516 100644 --- a/lib/PublicInbox/Over.pm +++ b/lib/PublicInbox/Over.pm @@ -7,7 +7,7 @@ package PublicInbox::Over; use strict; use v5.10.1; -use DBI; +use DBI qw(:sql_types); # SQL_BLOB use DBD::SQLite; use PublicInbox::Smsg; use Compress::Zlib qw(uncompress); @@ -349,4 +349,24 @@ sub check_inodes { } } +sub blob_exists { + my ($self, $oidhex) = @_; + if (wantarray) { + my $sth = $self->dbh->prepare_cached(<<'', undef, 1); +SELECT docid FROM xref3 WHERE oidbin = ? + + $sth->bind_param(1, pack('H*', $oidhex), SQL_BLOB); + $sth->execute; + my $tmp = $sth->fetchall_arrayref; + map { $_->[0] } @$tmp; + } else { + my $sth = $self->dbh->prepare_cached(<<'', undef, 1); +SELECT COUNT(*) FROM xref3 WHERE oidbin = ? + + $sth->bind_param(1, pack('H*', $oidhex), SQL_BLOB); + $sth->execute; + $sth->fetchrow_array; + } +} + 1; diff --git a/lib/PublicInbox/OverIdx.pm b/lib/PublicInbox/OverIdx.pm index 9013ae23..e1cd31b9 100644 --- a/lib/PublicInbox/OverIdx.pm +++ b/lib/PublicInbox/OverIdx.pm @@ -668,14 +668,4 @@ DELETE FROM eidxq WHERE docid = ? } -sub blob_exists { - my ($self, $oidhex) = @_; - my $sth = $self->dbh->prepare_cached(<<'', undef, 1); -SELECT COUNT(*) FROM xref3 WHERE oidbin = ? - - $sth->bind_param(1, pack('H*', $oidhex), SQL_BLOB); - $sth->execute; - $sth->fetchrow_array; -} - 1; diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index e2a1a678..3237aadc 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -494,7 +494,10 @@ sub add_eidx_info { begin_txn_lazy($self); my $doc = _get_doc($self, $docid) or return; term_generator($self)->set_document($doc); + + # '.' is special for lei_store $doc->add_boolean_term('O'.$eidx_key) if $eidx_key ne '.'; + index_list_id($self, $doc, $eml); $self->{xdb}->replace_document($docid, $doc); } diff --git a/t/eml.t b/t/eml.t index ebd45c13..0cf48f22 100644 --- a/t/eml.t +++ b/t/eml.t @@ -26,6 +26,8 @@ sub mime_load ($) { is($str, "hi\n", '->new modified body like Email::Simple'); is($eml->body, "hi\n", '->body works'); is($eml->as_string, "a: b\n\nhi\n", '->as_string'); + my $empty = PublicInbox::Eml->new("\n\n"); + is($empty->as_string, "\n\n", 'empty message'); } for my $cls (@classes) { diff --git a/t/lei-convert.t b/t/lei-convert.t index 186cfb13..e147715d 100644 --- a/t/lei-convert.t +++ b/t/lei-convert.t @@ -60,7 +60,8 @@ test_lei({ tmpdir => $tmpdir }, sub { PublicInbox::MdirReader::maildir_each_eml("$d/md", sub { push @md, $_[2]; }); - is(scalar(@md), scalar(@mboxrd), 'got expected emails in Maildir'); + is(scalar(@md), scalar(@mboxrd), 'got expected emails in Maildir') or + diag $lei_err; @md = sort { ${$a->{bdy}} cmp ${$b->{bdy}} } @md; @mboxrd = sort { ${$a->{bdy}} cmp ${$b->{bdy}} } @mboxrd; my @rd_nostatus = map { diff --git a/t/lei-q-kw.t b/t/lei-q-kw.t index 917a2c53..e7e14221 100644 --- a/t/lei-q-kw.t +++ b/t/lei-q-kw.t @@ -112,7 +112,51 @@ for my $sfx ('', '.gz') { lei_ok(qw(q -o), "mboxrd:/dev/stdout", qw(m:qp@example.com)) or diag $lei_err; like($lei_out, qr/^Status: OR\n/sm, 'Status set by previous augment'); -} +} # /mbox + mbox.gz tests -}); +my ($ro_home, $cfg_path) = setup_public_inboxes; + +# import keywords-only for external messages: +$o = "$ENV{HOME}/kwdir"; +my $m = 'alpine.DEB.2.20.1608131214070.4924@example'; +my @inc = ('-I', "$ro_home/t1"); +lei_ok(qw(q -o), $o, "m:$m", @inc); + +# emulate MUA marking a Maildir message as read: +@fn = glob("$o/cur/*"); +scalar(@fn) == 1 or BAIL_OUT "wrote multiple or zero files: ".explain(\@fn); +rename($fn[0], "$fn[0]S") or BAIL_OUT "rename $!"; + +lei_ok(qw(q -o), $o, 'bogus', \'clobber output dir to import keywords'); +@fn = glob("$o/cur/*"); +is_deeply(\@fn, [], 'output dir actually clobbered'); +lei_ok('q', "m:$m", @inc); +my $res = json_utf8->decode($lei_out); +is_deeply($res->[0]->{kw}, ['seen'], 'seen flag set for external message') + or diag explain($res); +lei_ok('q', "m:$m", '--no-external'); +is_deeply($res = json_utf8->decode($lei_out), [ undef ], + 'external message not imported') or diag explain($res); + +$o = "$ENV{HOME}/kwmboxrd"; +lei_ok(qw(q -o), "mboxrd:$o", "m:$m", @inc); + +# emulate MUA marking mboxrd message as unread +open my $fh, '<', $o or BAIL_OUT; +my $s = do { local $/; <$fh> }; +$s =~ s/^Status: OR\n/Status: O\nX-Status: A\n/sm or + fail "failed to clear R flag in $s"; +open $fh, '>', $o or BAIL_OUT; +print $fh $s or BAIL_OUT; +close $fh or BAIL_OUT; + +lei_ok(qw(q -o), "mboxrd:$o", 'm:bogus', @inc, + \'clobber mbox to import keywords'); +lei_ok(qw(q -o), "mboxrd:$o", "m:$m", @inc); +open $fh, '<', $o or BAIL_OUT; +$s = do { local $/; <$fh> }; +like($s, qr/^Status: O\n/ms, 'seen keyword gone in mbox'); +like($s, qr/^X-Status: A\n/ms, 'answered flag set'); + +}); # test_lei done_testing; diff --git a/t/lei-q-thread.t b/t/lei-q-thread.t index e24fb2cb..c999d12b 100644 --- a/t/lei-q-thread.t +++ b/t/lei-q-thread.t @@ -43,10 +43,11 @@ test_lei(sub { 'flagged set in direct hit'); lei_ok qw(q -tt m:testmessage@example.com --only), "$ro_home/t2"; $res = json_utf8->decode($lei_out); - is_deeply($res->[0]->{kw}, [ 'flagged' ], - 'flagged set on external with -tt'); + is_deeply($res->[0]->{kw}, [ qw(flagged seen) ], + 'flagged set on external with -tt') or diag explain($res); lei_ok qw(q -t m:testmessage@example.com --only), "$ro_home/t2"; $res = json_utf8->decode($lei_out); - ok(!exists($res->[0]->{kw}), 'flagged not set on external with 1 -t'); + is_deeply($res->[0]->{kw}, [ 'seen' ], + 'flagged not set on external with 1 -t') or diag explain($res); }); done_testing;
Don't waste precious terminal space when there are only a small number of possible keywords supported/reserved for JMAP. In the future, we may implement more sophisticated wrapping for labels, but it we'll cross tha bridge when we come to it. --- lib/PublicInbox/LeiOverview.pm | 5 ++++- t/lei-q-kw.t | 7 +++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/lib/PublicInbox/LeiOverview.pm b/lib/PublicInbox/LeiOverview.pm index 48237f8a..521bca50 100644 --- a/lib/PublicInbox/LeiOverview.pm +++ b/lib/PublicInbox/LeiOverview.pm @@ -176,7 +176,10 @@ sub _json_pretty { $pair =~ s/(null|"),"/$1, "/g; $pair; } @$v) . ']'; - } else { # references + } elsif ($k eq 'kw') { # keywords are short, one-line + $v = $json->encode($v); + $v =~ s/","/", "/g; + } else { # refs, labels, ... $v = '[' . join($sep, map { substr($json->encode([$_]), 1, -1); } @$v) . ']'; diff --git a/t/lei-q-kw.t b/t/lei-q-kw.t index e7e14221..de2c775a 100644 --- a/t/lei-q-kw.t +++ b/t/lei-q-kw.t @@ -144,7 +144,7 @@ lei_ok(qw(q -o), "mboxrd:$o", "m:$m", @inc); # emulate MUA marking mboxrd message as unread open my $fh, '<', $o or BAIL_OUT; my $s = do { local $/; <$fh> }; -$s =~ s/^Status: OR\n/Status: O\nX-Status: A\n/sm or +$s =~ s/^Status: OR\n/Status: O\nX-Status: AF\n/sm or fail "failed to clear R flag in $s"; open $fh, '>', $o or BAIL_OUT; print $fh $s or BAIL_OUT; @@ -156,7 +156,10 @@ lei_ok(qw(q -o), "mboxrd:$o", "m:$m", @inc); open $fh, '<', $o or BAIL_OUT; $s = do { local $/; <$fh> }; like($s, qr/^Status: O\n/ms, 'seen keyword gone in mbox'); -like($s, qr/^X-Status: A\n/ms, 'answered flag set'); +like($s, qr/^X-Status: AF\n/ms, 'answered + flagged set'); +lei_ok(qw(q --pretty), "m:$m", @inc); +like($lei_out, qr/^ "kw": \["answered", "flagged"\],\n/sm, + '--pretty JSON output shows kw: on one line'); }); # test_lei done_testing;
These changes may make it easier to do byte-for-byte comparisons with mail copied out of mutt, a popular MUA for our target audience. mutt currently outputs the 'R' (seen) flag before the 'O' character in the Status: header. We'll assume that stays the case (it has been for a while). Status now comes before X-Status, also matching mutt behavior. --- lib/PublicInbox/LeiToMail.pm | 14 ++++++++------ t/lei-q-kw.t | 14 +++++++------- t/lei-q-remote-import.t | 4 ++-- t/lei_to_mail.t | 2 +- 4 files changed, 18 insertions(+), 16 deletions(-) diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm index 3e6cf00c..e9ab939c 100644 --- a/lib/PublicInbox/LeiToMail.pm +++ b/lib/PublicInbox/LeiToMail.pm @@ -42,10 +42,7 @@ sub _mbox_hdr_buf ($$$) { my ($eml, $type, $smsg) = @_; $eml->header_set($_) for (qw(Lines Bytes Content-Length)); - # Messages are always 'O' (non-\Recent in IMAP), it saves - # MUAs the trouble of rewriting the mbox if no other - # changes are made - my %hdr = (Status => [ 'O' ]); # set Status, X-Status + my %hdr = (Status => []); # set Status, X-Status for my $k (@{$smsg->{kw} // []}) { if (my $ent = $kw2status{$k}) { push @{$hdr{$ent->[0]}}, $ent->[1]; @@ -53,8 +50,13 @@ sub _mbox_hdr_buf ($$$) { warn "TODO: keyword `$k' not supported for mbox\n"; } } - while (my ($name, $chars) = each %hdr) { - $eml->header_set($name, join('', sort @$chars)); + # Messages are always 'O' (non-\Recent in IMAP), it saves + # MUAs the trouble of rewriting the mbox if no other + # changes are made. We put 'O' at the end (e.g. "Status: RO") + # to match mutt(1) output. + $eml->header_set('Status', join('', sort(@{$hdr{Status}})). 'O'); + if (my $chars = delete $hdr{'X-Status'}) { + $eml->header_set('X-Status', join('', sort(@$chars))); } my $buf = delete $eml->{hdr}; diff --git a/t/lei-q-kw.t b/t/lei-q-kw.t index de2c775a..b5e22e9b 100644 --- a/t/lei-q-kw.t +++ b/t/lei-q-kw.t @@ -13,7 +13,7 @@ my $exp = { '<qp@example.com>' => eml_load('t/plack-qp.eml'), '<testmessage@example.com>' => eml_load('t/utf8.eml'), }; -$exp->{'<qp@example.com>'}->header_set('Status', 'OR'); +$exp->{'<qp@example.com>'}->header_set('Status', 'RO'); $exp->{'<testmessage@example.com>'}->header_set('Status', 'O'); test_lei(sub { @@ -57,7 +57,7 @@ SKIP: { open my $fh, '<', \$buf or BAIL_OUT $!; PublicInbox::MboxReader->mboxrd($fh, sub { my ($eml) = @_; - $eml->header_set('Status', 'OR'); + $eml->header_set('Status', 'RO'); is_deeply($eml, $exp->{'<qp@example.com>'}, 'FIFO output works as expected'); }); @@ -96,7 +96,7 @@ for my $sfx ('', '.gz') { is($buf, '', 'emptied'); lei_ok(qw(q -o), "mboxrd:$o", qw(m:qp@example.com)); $buf = $read_file->($o); - $buf =~ s/\nStatus: O\n\n/\nStatus: OR\n\n/s or + $buf =~ s/\nStatus: O\n\n/\nStatus: RO\n\n/s or BAIL_OUT "no Status in $buf"; $write_file->($o, $buf); lei_ok(qw(q -a -o), "mboxrd:$o", qw(m:testmessage@example.com)); @@ -111,7 +111,7 @@ for my $sfx ('', '.gz') { lei_ok(qw(q -o), "mboxrd:/dev/stdout", qw(m:qp@example.com)) or diag $lei_err; - like($lei_out, qr/^Status: OR\n/sm, 'Status set by previous augment'); + like($lei_out, qr/^Status: RO\n/sm, 'Status set by previous augment'); } # /mbox + mbox.gz tests my ($ro_home, $cfg_path) = setup_public_inboxes; @@ -144,7 +144,7 @@ lei_ok(qw(q -o), "mboxrd:$o", "m:$m", @inc); # emulate MUA marking mboxrd message as unread open my $fh, '<', $o or BAIL_OUT; my $s = do { local $/; <$fh> }; -$s =~ s/^Status: OR\n/Status: O\nX-Status: AF\n/sm or +$s =~ s/^Status: RO\n/Status: O\nX-Status: AF\n/sm or fail "failed to clear R flag in $s"; open $fh, '>', $o or BAIL_OUT; print $fh $s or BAIL_OUT; @@ -155,8 +155,8 @@ lei_ok(qw(q -o), "mboxrd:$o", 'm:bogus', @inc, lei_ok(qw(q -o), "mboxrd:$o", "m:$m", @inc); open $fh, '<', $o or BAIL_OUT; $s = do { local $/; <$fh> }; -like($s, qr/^Status: O\n/ms, 'seen keyword gone in mbox'); -like($s, qr/^X-Status: AF\n/ms, 'answered + flagged set'); +like($s, qr/^Status: O\nX-Status: AF\n/ms, + 'seen keyword gone in mbox, answered + flagged set'); lei_ok(qw(q --pretty), "m:$m", @inc); like($lei_out, qr/^ "kw": \["answered", "flagged"\],\n/sm, diff --git a/t/lei-q-remote-import.t b/t/lei-q-remote-import.t index 2293489a..25e461ac 100644 --- a/t/lei-q-remote-import.t +++ b/t/lei-q-remote-import.t @@ -80,7 +80,7 @@ From a@z Mon Sep 17 00:00:00 2001 From: nobody@localhost Date: Sat, 13 Mar 2021 18:23:01 +0600 Message-ID: <never-before-seen@example.com> -Status: RO +Status: OR whatever EOF @@ -89,7 +89,7 @@ EOF is_deeply($slurp_emls->($o), [$exp], 'got expected result after clobber') or diag $lei_err; lei_ok(qw(q -o mboxrd:/dev/stdout m:never-before-seen@example.com)); - like($lei_out, qr/seen\@example\.com>\nStatus: OR\n\nwhatever/sm, + like($lei_out, qr/seen\@example\.com>\nStatus: RO\n\nwhatever/sm, '--import-before imported totally unseen message'); }); done_testing; diff --git a/t/lei_to_mail.t b/t/lei_to_mail.t index 585db689..626bdab3 100644 --- a/t/lei_to_mail.t +++ b/t/lei_to_mail.t @@ -28,7 +28,7 @@ for my $mbox (@MBOX) { my $s = $cb->(PublicInbox::Eml->new($from), $smsg); is(substr($$s, -1, 1), "\n", "trailing LF in normal $mbox"); my $eml = PublicInbox::Eml->new($s); - is($eml->header('Status'), 'OR', "Status: set by $m"); + is($eml->header('Status'), 'RO', "Status: set by $m"); is($eml->header('X-Status'), 'AF', "X-Status: set by $m"); if ($mbox eq 'mboxcl2') { like($eml->body_raw, qr/^From /, "From not escaped $m");
This should make a future change to "lei import" work more nicely, since we'll be needing ALE to vivify external-only messages upon explicit "lei import". --- lib/PublicInbox/LEI.pm | 3 +-- lib/PublicInbox/LeiALE.pm | 19 ++++++++++++++++--- lib/PublicInbox/LeiExternal.pm | 6 ------ lib/PublicInbox/LeiQuery.pm | 4 ---- t/lei_xsearch.t | 2 +- 5 files changed, 18 insertions(+), 16 deletions(-) diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm index 0da26a32..72a0e52c 100644 --- a/lib/PublicInbox/LEI.pm +++ b/lib/PublicInbox/LEI.pm @@ -108,8 +108,7 @@ sub ale { my ($self) = @_; $self->{ale} //= do { require PublicInbox::LeiALE; - PublicInbox::LeiALE->new(cache_dir($self). - '/all_locals_ever.git'); + $self->_lei_cfg(1)->{ale} //= PublicInbox::LeiALE->new($self); }; } diff --git a/lib/PublicInbox/LeiALE.pm b/lib/PublicInbox/LeiALE.pm index bdb50a1a..45748435 100644 --- a/lib/PublicInbox/LeiALE.pm +++ b/lib/PublicInbox/LeiALE.pm @@ -11,16 +11,29 @@ use v5.10.1; use parent qw(PublicInbox::LeiSearch PublicInbox::Lock); use PublicInbox::Git; use PublicInbox::Import; +use PublicInbox::LeiXSearch; use Fcntl qw(SEEK_SET); -sub new { - my ($cls, $d) = @_; +sub _new { + my ($d) = @_; PublicInbox::Import::init_bare($d, 'ale'); bless { git => PublicInbox::Git->new($d), lock_path => "$d/lei_ale.state", # dual-duty lock + state ibxish => [], # Inbox and ExtSearch (and LeiSearch) objects - }, $cls; + }, __PACKAGE__ +} + +sub new { + my ($self, $lei) = @_; + ref($self) or $self = _new($lei->cache_dir . '/all_locals_ever.git'); + my $lxs = PublicInbox::LeiXSearch->new; + $lxs->prepare_external($lei->_lei_store(1)->search); + for my $loc ($lei->externals_each) { # locals only + $lxs->prepare_external($loc) if -d $loc; + } + $self->refresh_externals($lxs); + $self; } sub over {} # undef for xoids_for diff --git a/lib/PublicInbox/LeiExternal.pm b/lib/PublicInbox/LeiExternal.pm index aa09be9e..b5dd85e1 100644 --- a/lib/PublicInbox/LeiExternal.pm +++ b/lib/PublicInbox/LeiExternal.pm @@ -139,12 +139,6 @@ sub add_external_finish { my $key = "external.$location.boost"; my $cur_boost = $cfg->{$key}; return if defined($cur_boost) && $cur_boost == $new_boost; # idempotent - if (-d $location) { - require PublicInbox::LeiXSearch; - my $lxs = PublicInbox::LeiXSearch->new; - $lxs->prepare_external($location); - $self->ale->refresh_externals($lxs); - } $self->lei_config($key, $new_boost); } diff --git a/lib/PublicInbox/LeiQuery.pm b/lib/PublicInbox/LeiQuery.pm index 007e35fc..148e8524 100644 --- a/lib/PublicInbox/LeiQuery.pm +++ b/lib/PublicInbox/LeiQuery.pm @@ -57,10 +57,6 @@ sub lei_q { } if ($opt->{'local'} //= scalar(@only) ? 0 : 1) { $lxs->prepare_external($lse); - } else { - my $tmp = PublicInbox::LeiXSearch->new; - $tmp->prepare_external($lse); - $self->ale->refresh_externals($tmp); } if (@only) { for my $loc (@only) { diff --git a/t/lei_xsearch.t b/t/lei_xsearch.t index 68211d18..e56b2820 100644 --- a/t/lei_xsearch.t +++ b/t/lei_xsearch.t @@ -90,7 +90,7 @@ is($lxs->over, undef, '->over fails'); my $mitem = ($mset->items)[0]; my $smsg = $lxs->smsg_for($mitem) or BAIL_OUT 'smsg_for broken'; - my $ale = PublicInbox::LeiALE->new("$home/ale"); + my $ale = PublicInbox::LeiALE::_new("$home/ale"); $ale->refresh_externals($lxs); my $exp = [ $smsg->{blob}, 'blob', -s 't/utf8.eml' ]; is_deeply([ $ale->git->check($smsg->{blob}) ], $exp, 'ale->git->check');