Some things that came up while working on making repeated/saved searches faster... Eric Wong (5): lei_query: remove unnecessary V2Writable require lei q: reduce lei/store work for kw changes to stored mail lei_store: set_xvmd: don't add if no vmd at all lei_store: quiet down git user info being unset lei_store: quiet down per-message related warnings lib/PublicInbox/LeiQuery.pm | 1 - lib/PublicInbox/LeiSearch.pm | 9 +++++---- lib/PublicInbox/LeiStore.pm | 33 ++++++++++++++------------------- lib/PublicInbox/LeiToMail.pm | 6 +++--- 4 files changed, 22 insertions(+), 27 deletions(-)
AFAIK that was only used for nproc detection, and nproc is handled by PublicInbox::IPC, nowadays. --- lib/PublicInbox/LeiQuery.pm | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/PublicInbox/LeiQuery.pm b/lib/PublicInbox/LeiQuery.pm index 3a437bf0..9174bea8 100644 --- a/lib/PublicInbox/LeiQuery.pm +++ b/lib/PublicInbox/LeiQuery.pm @@ -86,7 +86,6 @@ sub lxs_prepare { sub lei_q { my ($self, @argv) = @_; require PublicInbox::LeiOverview; - require PublicInbox::V2Writable; PublicInbox::Config->json; # preload before forking PublicInbox::LeiOverview->new($self) or return; my $lxs = lxs_prepare($self) or return;
We can tweak lse->kw_changed to return docids and reduce IPC traffic and reduce work the lei/store worker needs to do. --- lib/PublicInbox/LeiSearch.pm | 9 +++++---- lib/PublicInbox/LeiStore.pm | 8 ++++---- lib/PublicInbox/LeiToMail.pm | 6 +++--- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/lib/PublicInbox/LeiSearch.pm b/lib/PublicInbox/LeiSearch.pm index 07d570ec..69ba8303 100644 --- a/lib/PublicInbox/LeiSearch.pm +++ b/lib/PublicInbox/LeiSearch.pm @@ -100,10 +100,11 @@ sub xoids_for { # returns true if $eml is indexed by lei/store and keywords don't match sub kw_changed { - my ($self, $eml, $new_kw_sorted) = @_; - my $xoids = xoids_for($self, $eml, 1) // return; - my ($num) = values %$xoids; - my @cur_kw = msg_keywords($self, $num); + my ($self, $eml, $new_kw_sorted, $docids) = @_; + my $xoids = xoids_for($self, $eml) // return; + $docids //= []; + @$docids = sort { $a <=> $b } values %$xoids; + my @cur_kw = msg_keywords($self, $docids->[0]); join("\0", @$new_kw_sorted) eq join("\0", @cur_kw) ? 0 : 1; } diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm index b76af4d3..48ab1d76 100644 --- a/lib/PublicInbox/LeiStore.pm +++ b/lib/PublicInbox/LeiStore.pm @@ -131,13 +131,13 @@ sub _docids_for ($$) { } sub set_eml_vmd { - my ($self, $eml, $vmd) = @_; + my ($self, $eml, $vmd, $docids) = @_; my $eidx = eidx_init($self); - my @docids = _docids_for($self, $eml); - for my $docid (@docids) { + $docids //= [ _docids_for($self, $eml) ]; + for my $docid (@$docids) { $eidx->idx_shard($docid)->ipc_do('set_vmd', $docid, $vmd); } - \@docids; + $docids; } sub add_eml_vmd { diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm index da633da4..0364d8ef 100644 --- a/lib/PublicInbox/LeiToMail.pm +++ b/lib/PublicInbox/LeiToMail.pm @@ -228,10 +228,10 @@ sub _mbox_write_cb ($$) { sub update_kw_maybe ($$$$) { my ($lei, $lse, $eml, $kw) = @_; return unless $lse; - my $lse_oids = $lse->kw_changed($eml, $kw); + my $c = $lse->kw_changed($eml, $kw, my $docids = []); my $vmd = { kw => $kw }; - if ($lse_oids) { # already in lei/store - $lei->{sto}->ipc_do('set_eml', $eml, $vmd); + if (scalar @$docids) { # already in lei/store + $lei->{sto}->ipc_do('set_eml_vmd', undef, $vmd, $docids) if $c; } elsif (my $xoids = $lei->{ale}->xoids_for($eml)) { # it's in an external, only set kw, here $lei->{sto}->ipc_do('set_xvmd', $xoids, $eml, $vmd);
There's no point in adding vmd information for an external message if it was never stored and there's no vmd at all. We also don't need to check _docids_for for similar messages, either, since we always check lse->kw_changed, first. --- lib/PublicInbox/LeiStore.pm | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm index 48ab1d76..f2aa45bd 100644 --- a/lib/PublicInbox/LeiStore.pm +++ b/lib/PublicInbox/LeiStore.pm @@ -307,19 +307,11 @@ sub set_xvmd { } return unless scalar(keys(%$xoids)); - # see if it was indexed, but with different OID(s) - if (my @docids = _docids_for($self, $eml)) { - for my $docid (@docids) { - next if $seen{$docid}; - for my $oid (keys %$xoids) { - $oidx->add_xref3($docid, -1, $oid, '.'); - } - my $idx = $eidx->idx_shard($docid); - $idx->ipc_do('set_vmd', $docid, $vmd); - } - return; - } - # totally unseen + # n.b. we don't do _docids_for here, we expect the caller + # already checked $lse->kw_changed before calling this sub + + return unless (@{$vmd->{kw} // []}) || (@{$vmd->{L} // []}); + # totally unseen: my ($smsg, $idx) = _external_only($self, $xoids, $eml); $idx->ipc_do('add_vmd', $smsg->{num}, $vmd); }
lei_store contents aren't intended to become public, so there's no point in nagging users for their email address for git committer information like git does. --- lib/PublicInbox/LeiStore.pm | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm index f2aa45bd..8574d736 100644 --- a/lib/PublicInbox/LeiStore.pm +++ b/lib/PublicInbox/LeiStore.pm @@ -51,8 +51,9 @@ sub git_epoch_max { sub git_ident ($) { my ($git) = @_; - chomp(my $i = $git->qx(qw(var GIT_COMMITTER_IDENT))); - warn "$git->{git_dir} GIT_COMMITTER_IDENT failed\n" if $?; + my $rdr = {}; + open $rdr->{2}, '>', '/dev/null' or die "open /dev/null: $!"; + chomp(my $i = $git->qx([qw(var GIT_COMMITTER_IDENT)], undef, $rdr)); $i =~ /\A(.+) <([^>]+)> [0-9]+ [-\+]?[0-9]+$/ ? ($1, $2) : ('lei user', 'x@example.com') }
It's needless noise when doing augment and output preparation and shows up way too late and out-of-band with lei-daemon. --- lib/PublicInbox/LeiStore.pm | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm index 8574d736..d2dd4e7b 100644 --- a/lib/PublicInbox/LeiStore.pm +++ b/lib/PublicInbox/LeiStore.pm @@ -11,6 +11,7 @@ use strict; use v5.10.1; use parent qw(PublicInbox::Lock PublicInbox::IPC); use PublicInbox::ExtSearchIdx; +use PublicInbox::Eml; use PublicInbox::Import; use PublicInbox::InboxWritable qw(eml_from_path); use PublicInbox::V2Writable; @@ -343,6 +344,7 @@ sub ipc_atfork_child { my ($self) = @_; my $lei = $self->{lei}; $lei->_lei_atfork_child(1) if $lei; + $SIG{__WARN__} = PublicInbox::Eml::warn_ignore_cb(); $self->SUPER::ipc_atfork_child; }