From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 4/7] searchidxshard: replace index_raw with index_eml
Date: Sun, 3 Jan 2021 02:06:14 +0000 [thread overview]
Message-ID: <20210103020617.15719-5-e@80x24.org> (raw)
In-Reply-To: <20210103020617.15719-1-e@80x24.org>
Since Storable and Sereal are designed for lossless
serialization, we'll just pass $eml objects to whatever process
is running SearchIdx.
---
lib/PublicInbox/ExtSearchIdx.pm | 4 ++--
lib/PublicInbox/LeiStore.pm | 3 ++-
lib/PublicInbox/SearchIdxShard.pm | 9 ++-------
lib/PublicInbox/V2Writable.pm | 11 +++++------
4 files changed, 11 insertions(+), 16 deletions(-)
diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm
index 064d9939..d55d3db9 100644
--- a/lib/PublicInbox/ExtSearchIdx.pm
+++ b/lib/PublicInbox/ExtSearchIdx.pm
@@ -135,7 +135,7 @@ sub index_unseen ($) {
my $oid = $new_smsg->{blob};
my $ibx = delete $req->{ibx} or die 'BUG: {ibx} unset';
$self->{oidx}->add_xref3($docid, $req->{xnum}, $oid, $ibx->eidx_key);
- $idx->index_raw(undef, $eml, $new_smsg, $ibx->eidx_key);
+ $idx->index_eml($eml, $new_smsg, $ibx->eidx_key);
check_batch_limit($req);
}
@@ -437,7 +437,7 @@ sub _reindex_finalize ($$$) {
my $top_smsg = pop @$stable;
$top_smsg == $smsg or die 'BUG: top_smsg != smsg';
my $ibx = _ibx_for($self, $sync, $smsg);
- $idx->index_raw(undef, $eml, $smsg, $ibx->eidx_key);
+ $idx->index_eml($eml, $smsg, $ibx->eidx_key);
for my $x (reverse @$stable) {
$ibx = _ibx_for($self, $sync, $x);
my $hdr = delete $x->{hdr} // die 'BUG: no {hdr}';
diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm
index d686e95a..4f77e8fa 100644
--- a/lib/PublicInbox/LeiStore.pm
+++ b/lib/PublicInbox/LeiStore.pm
@@ -199,6 +199,7 @@ sub add_eml {
$im->add($eml, undef, $smsg) or return; # duplicate returns undef
my $msgref = delete $smsg->{-raw_email};
$smsg->{bytes} = $smsg->{raw_bytes} + crlf_adjust($$msgref);
+ undef $msgref;
local $self->{current_info} = $smsg->{blob};
if (my @docids = _docids_for($self, $eml)) {
@@ -215,7 +216,7 @@ sub add_eml {
$oidx->add_overview($eml, $smsg);
$oidx->add_xref3($smsg->{num}, -1, $smsg->{blob}, '.');
my $idx = $eidx->idx_shard($smsg->{num});
- $idx->index_raw($msgref, $eml, $smsg);
+ $idx->index_eml($eml, $smsg);
$idx->ipc_do('add_keywords', $smsg->{num}, @kw) if @kw;
$smsg;
}
diff --git a/lib/PublicInbox/SearchIdxShard.pm b/lib/PublicInbox/SearchIdxShard.pm
index 43dad959..83cbbb25 100644
--- a/lib/PublicInbox/SearchIdxShard.pm
+++ b/lib/PublicInbox/SearchIdxShard.pm
@@ -43,13 +43,8 @@ sub ipc_atfork_child { # called automatically before ipc_worker_loop
PublicInbox::OnDestroy->new($$, \&_worker_done, $self);
}
-sub index_raw {
- my ($self, $msgref, $eml, $smsg, $eidx_key) = @_;
- if ($eml) {
- undef($$msgref) if $msgref;
- } else { # --xapian-only + --sequential-shard:
- $eml = PublicInbox::Eml->new($msgref);
- }
+sub index_eml {
+ my ($self, $eml, $smsg, $eidx_key) = @_;
$smsg->{eidx_key} = $eidx_key if defined $eidx_key;
$self->ipc_do('add_message', $eml, $smsg);
}
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index 885edbe9..7b6b93a0 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -140,11 +140,11 @@ sub idx_shard ($$) {
# indexes a message, returns true if checkpointing is needed
sub do_idx ($$$$) {
- my ($self, $msgref, $mime, $smsg) = @_;
+ my ($self, $msgref, $eml, $smsg) = @_;
$smsg->{bytes} = $smsg->{raw_bytes} + crlf_adjust($$msgref);
- $self->{oidx}->add_overview($mime, $smsg);
+ $self->{oidx}->add_overview($eml, $smsg);
my $idx = idx_shard($self, $smsg->{num});
- $idx->index_raw($msgref, $mime, $smsg);
+ $idx->index_eml($eml, $smsg);
my $n = $self->{transact_bytes} += $smsg->{raw_bytes};
$n >= $self->{batch_bytes};
}
@@ -173,8 +173,7 @@ sub _add {
$cmt = $im->get_mark($cmt);
$self->{last_commit}->[$self->{epoch_max}] = $cmt;
- my $msgref = delete $smsg->{-raw_email};
- if (do_idx($self, $msgref, $mime, $smsg)) {
+ if (do_idx($self, delete $smsg->{-raw_email}, $mime, $smsg)) {
$self->checkpoint;
}
@@ -1219,7 +1218,7 @@ sub index_xap_only { # git->cat_async callback
my $self = $smsg->{self};
my $idx = idx_shard($self, $smsg->{num});
$smsg->{raw_bytes} = $size;
- $idx->index_raw($bref, undef, $smsg);
+ $idx->index_eml(PublicInbox::Eml->new($bref), $smsg);
$self->{transact_bytes} += $size;
}
next prev parent reply other threads:[~2021-01-03 2:06 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-01-03 2:06 [PATCH 0/7] v2: swap in new IPC package Eric Wong
2021-01-03 2:06 ` [PATCH 1/7] ipc: some documentation comments Eric Wong
2021-01-03 2:06 ` [PATCH 2/7] searchidxshard: use PublicInbox::IPC to kill lots of code Eric Wong
2021-01-03 2:06 ` [PATCH 3/7] searchidxshard: IPC conversion, part 2 Eric Wong
2021-01-03 2:06 ` Eric Wong [this message]
2021-01-03 2:06 ` [PATCH 5/7] use Eml (or MIME) objects for all indexing paths Eric Wong
2021-01-03 2:06 ` [PATCH 6/7] ipc: switch to one-way pipes Eric Wong
2021-01-03 2:06 ` [PATCH 7/7] searchidxshard: use add_xapian directly for v2 Eric Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210103020617.15719-5-e@80x24.org \
--to=e@80x24.org \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).