* [PATCH 2/2] searchmsg: remove ensure_metadata
2016-12-20 3:03 [PATCH 0/2] improve threading performance Eric Wong
2016-12-20 3:03 ` [PATCH 1/2] tests: add thread-all testing for benchmarking Eric Wong
@ 2016-12-20 3:03 ` Eric Wong
1 sibling, 0 replies; 3+ messages in thread
From: Eric Wong @ 2016-12-20 3:03 UTC (permalink / raw)
To: meta; +Cc: Eric Wong
Instead, only preload the ->mid field for threading,
as we only need ->thread and ->path once in Search->get_thread
(but we will need the ->mid field repeatedly).
This more than doubles View->load_results performance on
according to thread-all on an inbox with over 300K messages.
---
lib/PublicInbox/Search.pm | 6 ------
lib/PublicInbox/SearchMsg.pm | 39 ++++++++++++---------------------------
lib/PublicInbox/View.pm | 2 +-
t/search.t | 2 --
4 files changed, 13 insertions(+), 36 deletions(-)
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 24cb266..d4f6f77 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -108,12 +108,6 @@ my %all_pfx = (%bool_pfx_internal, %bool_pfx_external, %prob_prefix);
sub xpfx { $all_pfx{$_[0]} }
-our %PFX2TERM_RMAP;
-my %meta_pfx = (mid => 1, thread => 1, path => 1);
-while (my ($k, $v) = each %all_pfx) {
- $PFX2TERM_RMAP{$v} = $k if $meta_pfx{$k};
-}
-
my $mail_query = Search::Xapian::Query->new(xpfx('type') . 'mail');
sub xdir {
diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm
index d62f02c..96406c6 100644
--- a/lib/PublicInbox/SearchMsg.pm
+++ b/lib/PublicInbox/SearchMsg.pm
@@ -10,7 +10,6 @@ use Search::Xapian;
use Date::Parse qw/str2time/;
use PublicInbox::MID qw/mid_clean/;
use PublicInbox::Address;
-our $PFX2TERM_RE = undef;
sub new {
my ($class, $mime) = @_;
@@ -121,29 +120,17 @@ sub references {
defined $x ? $x : '';
}
-sub ensure_metadata {
- my ($self) = @_;
+sub _get_term_val ($$$) {
+ my ($self, $pfx, $re) = @_;
my $doc = $self->{doc};
my $end = $doc->termlist_end;
-
- unless (defined $PFX2TERM_RE) {
- my $or = join('|', keys %PublicInbox::Search::PFX2TERM_RMAP);
- $PFX2TERM_RE = qr/\A($or)/;
- }
-
- while (my ($pfx, $field) = each %PublicInbox::Search::PFX2TERM_RMAP) {
- # ideally we'd move this out of the loop:
- my $i = $doc->termlist_begin;
-
- $i->skip_to($pfx);
- if ($i != $end) {
- my $val = $i->get_termname;
-
- if ($val =~ s/$PFX2TERM_RE//o) {
- $self->{$field} = $val;
- }
- }
+ my $i = $doc->termlist_begin;
+ $i->skip_to($pfx);
+ if ($i != $end) {
+ my $val = $i->get_termname;
+ $val =~ s/$re// and return $val;
}
+ undef;
}
sub mid ($;$) {
@@ -154,8 +141,8 @@ sub mid ($;$) {
} elsif (my $rv = $self->{mid}) {
$rv;
} else {
- $self->ensure_metadata; # needed for ghosts
- $self->{mid} ||= $self->_extract_mid;
+ $self->{mid} = _get_term_val($self, 'Q', qr/\AQ/) ||
+ $self->_extract_mid;
}
}
@@ -194,16 +181,14 @@ sub thread_id {
my ($self) = @_;
my $tid = $self->{thread};
return $tid if defined $tid;
- $self->ensure_metadata;
- $self->{thread};
+ $self->{thread} = _get_term_val($self, 'G', qr/\AG/); # *G*roup
}
sub path {
my ($self) = @_;
my $path = $self->{path};
return $path if defined $path;
- $self->ensure_metadata;
- $self->{path};
+ $self->{path} = _get_term_val($self, 'XPATH', qr/\AXPATH/); # path
}
1;
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index fa47a16..a50cb64 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -737,7 +737,7 @@ sub indent_for {
sub load_results {
my ($srch, $sres) = @_;
my $msgs = delete $sres->{msgs};
- $srch->retry_reopen(sub { [ map { $_->ensure_metadata; $_ } @$msgs ] });
+ $srch->retry_reopen(sub { [ map { $_->mid; $_ } @$msgs ] });
}
sub msg_timestamp {
diff --git a/t/search.t b/t/search.t
index eed9c9b..c16811d 100644
--- a/t/search.t
+++ b/t/search.t
@@ -109,7 +109,6 @@ sub filter_mids {
my $found = $ro->lookup_message('<root@s>');
ok($found, "message found");
is($root_id, $found->{doc_id}, 'doc_id set correctly');
- $found->ensure_metadata;
is($found->mid, 'root@s', 'mid set correctly');
ok(int($found->thread_id) > 0, 'thread_id is an integer');
@@ -290,7 +289,6 @@ sub filter_mids {
body => "LOOP!\n"));
ok($doc_id > 0, "doc_id defined with circular reference");
my $smsg = $rw->lookup_message('circle@a');
- $smsg->ensure_metadata;
is($smsg->references, '', "no references created");
my $msg = PublicInbox::SearchMsg->load_doc($smsg->{doc});
is($s, $msg->subject, 'long subject not rewritten');
--
EW
^ permalink raw reply related [flat|nested] 3+ messages in thread