* [PATCH] search: do not index references and inreplyto terms
@ 2015-08-30 1:10 Eric Wong
0 siblings, 0 replies; only message in thread
From: Eric Wong @ 2015-08-30 1:10 UTC (permalink / raw)
To: meta
We no longer need them, as we can rely on index-time thread
resolution and thread merging. This allows us to index less
data and hopefully increase efficiency.
---
lib/PublicInbox/Search.pm | 17 ++---------------
lib/PublicInbox/SearchIdx.pm | 5 -----
t/search.t | 21 +++------------------
3 files changed, 5 insertions(+), 38 deletions(-)
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 4b3830e..d3faaeb 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -24,7 +24,8 @@ use constant {
# 4 - change "Re: " normalization, avoid circular Reference ghosts
# 5 - subject_path drops trailing '.'
# 6 - preserve References: order in document data
- SCHEMA_VERSION => 6,
+ # 7 - remove references and inreplyto terms
+ SCHEMA_VERSION => 7,
QP_FLAGS => FLAG_PHRASE|FLAG_BOOLEAN|FLAG_LOVEHATE|FLAG_WILDCARD,
};
@@ -37,8 +38,6 @@ my %bool_pfx_internal = (
my %bool_pfx_external = (
path => 'XPATH',
thread => 'G', # newsGroup (or similar entity - e.g. a web forum name)
- references => 'XREFS',
- inreplyto => 'XIRT',
);
my %prob_prefix = (
@@ -87,18 +86,6 @@ sub get_subject_path {
$self->do_enquire($query, $opts);
}
-# given a message ID, get followups to a message
-sub get_followups {
- my ($self, $mid, $opts) = @_;
- $mid = mid_clean($mid);
- $mid = mid_compress($mid);
- my $qp = $self->qp;
- my $irt = $qp->parse_query("inreplyto:$mid", 0);
- my $ref = $qp->parse_query("references:$mid", 0);
- my $query = Search::Xapian::Query->new(OP_OR, $irt, $ref);
- $self->do_enquire($query, $opts);
-}
-
sub get_thread {
my ($self, $mid, $opts) = @_;
my $smsg = eval { $self->lookup_message($mid) };
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 2ac53a7..dec3333 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -238,20 +238,15 @@ sub link_message_to_parents {
}
}
if (@refs) {
- $doc->add_term(xpfx('inreplyto') . $irt) if defined $irt;
$smsg->{references_sorted} = '<'.join('><', @refs).'>';
- my $ref_pfx = xpfx('references');
-
# first ref *should* be the thread root,
# but we can never trust clients to do the right thing
my $ref = shift @refs;
- $doc->add_term($ref_pfx . $ref);
$tid = $self->_resolve_mid_to_tid($ref);
# the rest of the refs should point to this tid:
foreach $ref (@refs) {
- $doc->add_term($ref_pfx . $ref);
my $ptid = $self->_resolve_mid_to_tid($ref);
if ($tid ne $ptid) {
$self->merge_threads($tid, $ptid);
diff --git a/t/search.t b/t/search.t
index 65539f1..02189ac 100644
--- a/t/search.t
+++ b/t/search.t
@@ -135,15 +135,6 @@ sub filter_mids {
my $second = $res->{msgs}->[0];
isnt($first, $second, "offset returned different result from limit");
-
- foreach my $f (qw(inreplyto references)) {
- $res = $ro->query($f . ':root@s');
- @res = filter_mids($res);
- is_deeply(\@res, [ 'last@s' ],
- "got expected results for $f: match");
- $res = $ro->query($f . ':root');
- is($res->{total}, 0, "no partial mid match");
- }
}
# ghost vivication
@@ -219,14 +210,8 @@ sub filter_mids {
$rw_commit->();
$ro->reopen;
- my $res = $ro->query('references:root@s');
- my @res = filter_mids($res);
- is_deeply(\@res, [ sort('last@s', $long_midc) ],
- "got expected results for references: match");
-
- my $followups = $ro->get_followups('root@s');
- $followups = [ filter_mids($followups) ];
- is_deeply($followups, [ filter_mids($res) ], "get_followups matches");
+ my $res;
+ my @res;
my $long_reply_mid = 'reply-to-long@1';
my $long_reply = Email::MIME->create(
@@ -301,7 +286,7 @@ sub filter_mids {
ok($doc_id > 0, "doc_id defined with circular reference");
my $smsg = $rw->lookup_message('circle@a');
$smsg->ensure_metadata;
- is($smsg->{references}, undef, "no references created");
+ is($smsg->references_sorted, '', "no references created");
}
done_testing();
--
EW
^ permalink raw reply related [flat|nested] only message in thread
only message in thread, other threads:[~2015-08-30 1:10 UTC | newest]
Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-08-30 1:10 [PATCH] search: do not index references and inreplyto terms Eric Wong
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).