From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 0C7FD1F9FD for ; Fri, 26 Mar 2021 09:51:27 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 1/4] lei q: skip lei/store->write_prepare for JSON outputs Date: Fri, 26 Mar 2021 09:51:23 +0000 Message-Id: <20210326095126.8184-2-e@80x24.org> In-Reply-To: <20210326095126.8184-1-e@80x24.org> References: <20210326095126.8184-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: JSON outputs won't write to lei/store at all, so there's no point in forking the store worker if it's not already running. LeiSearch object ($lse) is also fork-safe until it opens a persistent FD for Xapian/SQLite so we can unconditionally carry it across fork. --- lib/PublicInbox/LeiOverview.pm | 4 ++-- lib/PublicInbox/LeiQuery.pm | 16 ++++++++-------- lib/PublicInbox/LeiToMail.pm | 16 ++++++++-------- lib/PublicInbox/LeiXSearch.pm | 1 + 4 files changed, 19 insertions(+), 18 deletions(-) diff --git a/lib/PublicInbox/LeiOverview.pm b/lib/PublicInbox/LeiOverview.pm index b4d81328..96bfff24 100644 --- a/lib/PublicInbox/LeiOverview.pm +++ b/lib/PublicInbox/LeiOverview.pm @@ -223,7 +223,7 @@ sub ovv_each_smsg_cb { # runs in wq worker usually } } elsif ($self->{fmt} =~ /\A(concat)?json\z/ && $lei->{opt}->{pretty}) { my $EOR = ($1//'') eq 'concat' ? "\n}" : "\n},"; - my $lse = $lei->{sto}->search; + my $lse = $lei->{lse}; sub { # DIY prettiness :P my ($smsg, $mitem) = @_; return if $dedupe->is_smsg_dup($smsg); @@ -247,7 +247,7 @@ sub ovv_each_smsg_cb { # runs in wq worker usually } } elsif ($json) { my $ORS = $self->{fmt} eq 'json' ? ",\n" : "\n"; # JSONL - my $lse = $lei->{sto}->search; + my $lse = $lei->{lse}; sub { my ($smsg, $mitem) = @_; return if $dedupe->is_smsg_dup($smsg); diff --git a/lib/PublicInbox/LeiQuery.pm b/lib/PublicInbox/LeiQuery.pm index 84996e7e..65aa9e87 100644 --- a/lib/PublicInbox/LeiQuery.pm +++ b/lib/PublicInbox/LeiQuery.pm @@ -25,8 +25,7 @@ sub qstr_add { # PublicInbox::InputPipe::consume callback for --stdin my ($self) = @_; # $_[1] = $rbuf if (defined($_[1])) { $_[1] eq '' and return eval { - my $lse = delete $self->{lse}; - $lse->query_approxidate($lse->git, + $self->{lse}->query_approxidate($self->{lse}->git, $self->{mset_opt}->{qstr}); _start_query($self); }; @@ -50,11 +49,7 @@ sub lei_q { # --local is enabled by default unless --only is used # we'll allow "--only $LOCATION --local" my $sto = $self->_lei_store(1); - if (($opt->{'import-remote'} //= 1) | - (($opt->{'import-before'} //= \1) ? 1 : 0)) { - $sto->write_prepare($self); - } - my $lse = $sto->search; + my $lse = $self->{lse} = $sto->search; if ($opt->{'local'} //= scalar(@only) ? 0 : 1) { $lxs->prepare_external($lse); } @@ -103,6 +98,12 @@ sub lei_q { return $self->fail("`$mj' writer jobs must be >= 1"); } PublicInbox::LeiOverview->new($self) or return; + if ($self->{l2m} && ($opt->{'import-remote'} //= 1) | + # we use \1 (a ref) to distinguish between + # user-supplied and default value + (($opt->{'import-before'} //= \1) ? 1 : 0)) { + $sto->write_prepare($self); + } $self->{l2m} and $self->{l2m}->{-wq_nr_workers} = $mj // do { $mj = POSIX::lround($nproc * 3 / 4); # keep some CPU for git $mj <= 0 ? 1 : $mj; @@ -131,7 +132,6 @@ sub lei_q { no query allowed on command-line with --stdin require PublicInbox::InputPipe; - $self->{lse} = $lse; # for query_approxidate PublicInbox::InputPipe::consume($self->{0}, \&qstr_add, $self); return; } diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm index 1be15707..f71f74cc 100644 --- a/lib/PublicInbox/LeiToMail.pm +++ b/lib/PublicInbox/LeiToMail.pm @@ -263,7 +263,7 @@ sub _mbox_write_cb ($$) { my $atomic_append = !defined($ovv->{lock_path}); my $dedupe = $lei->{dedupe}; $dedupe->prepare_dedupe; - my $lse = $lei->{sto} ? $lei->{sto}->search : undef; + my $lse = $lei->{lse}; # may be undef sub { # for git_to_mail my ($buf, $smsg, $eml) = @_; $eml //= PublicInbox::Eml->new($buf); @@ -352,7 +352,7 @@ sub _maildir_write_cb ($$) { my $dedupe = $lei->{dedupe}; $dedupe->prepare_dedupe if $dedupe; my $dst = $lei->{ovv}->{dst}; - my $lse = $lei->{sto} ? $lei->{sto}->search : undef; + my $lse = $lei->{lse}; # may be undef sub { # for git_to_mail my ($buf, $smsg, $eml) = @_; $dst // return $lei->fail; # dst may be undef-ed in last run @@ -373,7 +373,7 @@ sub _imap_write_cb ($$) { my $imap_append = $lei->{net}->can('imap_append'); my $mic = $lei->{net}->mic_get($self->{uri}); my $folder = $self->{uri}->mailbox; - my $lse = $lei->{sto} ? $lei->{sto}->search : undef; + my $lse = $lei->{lse}; # may be undef sub { # for git_to_mail my ($bref, $smsg, $eml) = @_; $mic // return $lei->fail; # dst may be undef-ed in last run @@ -449,7 +449,7 @@ sub _pre_augment_maildir { sub _do_augment_maildir { my ($self, $lei) = @_; my $dst = $lei->{ovv}->{dst}; - my $lse = $lei->{sto}->search if $lei->{opt}->{'import-before'}; + my $lse = $lei->{opt}->{'import-before'} ? $lei->{lse} : undef; my ($mod, $shard) = @{$self->{shard_info} // []}; if ($lei->{opt}->{augment}) { my $dedupe = $lei->{dedupe}; @@ -481,7 +481,7 @@ sub _imap_augment_or_delete { # PublicInbox::NetReader::imap_each cb sub _do_augment_imap { my ($self, $lei) = @_; my $net = $lei->{net}; - my $lse = $lei->{sto}->search if $lei->{opt}->{'import-before'}; + my $lse = $lei->{opt}->{'import-before'} ? $lei->{lse} : undef; if ($lei->{opt}->{augment}) { my $dedupe = $lei->{dedupe}; if ($dedupe && $dedupe->prepare_dedupe) { @@ -523,9 +523,9 @@ sub _pre_augment_mbox { die "seek($dst): $!\n"; } if (!$self->{seekable}) { - my $ia = $lei->{opt}->{'import-before'}; + my $imp_before = $lei->{opt}->{'import-before'}; die "--import-before specified but $dst is not seekable\n" - if $ia && !ref($ia); + if $imp_before && !ref($imp_before); die "--augment specified but $dst is not seekable\n" if $lei->{opt}->{augment}; } @@ -562,7 +562,7 @@ sub _do_augment_mbox { $dedupe->prepare_dedupe if $dedupe; } if ($opt->{'import-before'}) { # the default - my $lse = $lei->{sto}->search; + my $lse = $lei->{lse}; PublicInbox::MboxReader->$fmt($rd, \&_mbox_augment_kw_maybe, $lei, $lse, $opt->{augment}); if (!$opt->{augment} and !truncate($out, 0)) { diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm index f64b2c62..6410e0ea 100644 --- a/lib/PublicInbox/LeiXSearch.pm +++ b/lib/PublicInbox/LeiXSearch.pm @@ -430,6 +430,7 @@ sub do_query { $lei->{1}->autoflush(1); $lei->start_pager if delete $lei->{need_pager}; $lei->{ovv}->ovv_begin($lei); + die 'BUG: xdb|over open' if $lei->{lse}->{xdb} || $lei->{lse}->{over}; if ($l2m) { $l2m->pre_augment($lei); if ($lei->{opt}->{augment} && delete $lei->{early_mua}) {