From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.0 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id B2AF0209F4; Tue, 20 Dec 2016 03:04:02 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Cc: Eric Wong Subject: [PATCH 2/2] searchmsg: remove ensure_metadata Date: Tue, 20 Dec 2016 03:03:57 +0000 Message-Id: <20161220030357.26350-3-e@80x24.org> In-Reply-To: <20161220030357.26350-1-e@80x24.org> References: <20161220030357.26350-1-e@80x24.org> List-Id: Instead, only preload the ->mid field for threading, as we only need ->thread and ->path once in Search->get_thread (but we will need the ->mid field repeatedly). This more than doubles View->load_results performance on according to thread-all on an inbox with over 300K messages. --- lib/PublicInbox/Search.pm | 6 ------ lib/PublicInbox/SearchMsg.pm | 39 ++++++++++++--------------------------- lib/PublicInbox/View.pm | 2 +- t/search.t | 2 -- 4 files changed, 13 insertions(+), 36 deletions(-) diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 24cb266..d4f6f77 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -108,12 +108,6 @@ my %all_pfx = (%bool_pfx_internal, %bool_pfx_external, %prob_prefix); sub xpfx { $all_pfx{$_[0]} } -our %PFX2TERM_RMAP; -my %meta_pfx = (mid => 1, thread => 1, path => 1); -while (my ($k, $v) = each %all_pfx) { - $PFX2TERM_RMAP{$v} = $k if $meta_pfx{$k}; -} - my $mail_query = Search::Xapian::Query->new(xpfx('type') . 'mail'); sub xdir { diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm index d62f02c..96406c6 100644 --- a/lib/PublicInbox/SearchMsg.pm +++ b/lib/PublicInbox/SearchMsg.pm @@ -10,7 +10,6 @@ use Search::Xapian; use Date::Parse qw/str2time/; use PublicInbox::MID qw/mid_clean/; use PublicInbox::Address; -our $PFX2TERM_RE = undef; sub new { my ($class, $mime) = @_; @@ -121,29 +120,17 @@ sub references { defined $x ? $x : ''; } -sub ensure_metadata { - my ($self) = @_; +sub _get_term_val ($$$) { + my ($self, $pfx, $re) = @_; my $doc = $self->{doc}; my $end = $doc->termlist_end; - - unless (defined $PFX2TERM_RE) { - my $or = join('|', keys %PublicInbox::Search::PFX2TERM_RMAP); - $PFX2TERM_RE = qr/\A($or)/; - } - - while (my ($pfx, $field) = each %PublicInbox::Search::PFX2TERM_RMAP) { - # ideally we'd move this out of the loop: - my $i = $doc->termlist_begin; - - $i->skip_to($pfx); - if ($i != $end) { - my $val = $i->get_termname; - - if ($val =~ s/$PFX2TERM_RE//o) { - $self->{$field} = $val; - } - } + my $i = $doc->termlist_begin; + $i->skip_to($pfx); + if ($i != $end) { + my $val = $i->get_termname; + $val =~ s/$re// and return $val; } + undef; } sub mid ($;$) { @@ -154,8 +141,8 @@ sub mid ($;$) { } elsif (my $rv = $self->{mid}) { $rv; } else { - $self->ensure_metadata; # needed for ghosts - $self->{mid} ||= $self->_extract_mid; + $self->{mid} = _get_term_val($self, 'Q', qr/\AQ/) || + $self->_extract_mid; } } @@ -194,16 +181,14 @@ sub thread_id { my ($self) = @_; my $tid = $self->{thread}; return $tid if defined $tid; - $self->ensure_metadata; - $self->{thread}; + $self->{thread} = _get_term_val($self, 'G', qr/\AG/); # *G*roup } sub path { my ($self) = @_; my $path = $self->{path}; return $path if defined $path; - $self->ensure_metadata; - $self->{path}; + $self->{path} = _get_term_val($self, 'XPATH', qr/\AXPATH/); # path } 1; diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index fa47a16..a50cb64 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -737,7 +737,7 @@ sub indent_for { sub load_results { my ($srch, $sres) = @_; my $msgs = delete $sres->{msgs}; - $srch->retry_reopen(sub { [ map { $_->ensure_metadata; $_ } @$msgs ] }); + $srch->retry_reopen(sub { [ map { $_->mid; $_ } @$msgs ] }); } sub msg_timestamp { diff --git a/t/search.t b/t/search.t index eed9c9b..c16811d 100644 --- a/t/search.t +++ b/t/search.t @@ -109,7 +109,6 @@ sub filter_mids { my $found = $ro->lookup_message(''); ok($found, "message found"); is($root_id, $found->{doc_id}, 'doc_id set correctly'); - $found->ensure_metadata; is($found->mid, 'root@s', 'mid set correctly'); ok(int($found->thread_id) > 0, 'thread_id is an integer'); @@ -290,7 +289,6 @@ sub filter_mids { body => "LOOP!\n")); ok($doc_id > 0, "doc_id defined with circular reference"); my $smsg = $rw->lookup_message('circle@a'); - $smsg->ensure_metadata; is($smsg->references, '', "no references created"); my $msg = PublicInbox::SearchMsg->load_doc($smsg->{doc}); is($s, $msg->subject, 'long subject not rewritten'); -- EW