From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.0 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id D8FD51FAF0 for ; Wed, 28 Feb 2018 23:42:08 +0000 (UTC) From: "Eric Wong (Contractor, The Linux Foundation)" To: meta@public-inbox.org Subject: [PATCH 14/21] searchidx: index values in the threader Date: Wed, 28 Feb 2018 23:41:55 +0000 Message-Id: <20180228234202.8839-15-e@80x24.org> In-Reply-To: <20180228234202.8839-1-e@80x24.org> References: <20180228234202.8839-1-e@80x24.org> List-Id: We will need timestamp, YYYYMMDD, article number, and line count for querying thread information (including XOVER for NNTP). --- lib/PublicInbox/SearchIdx.pm | 24 ++++++++++++++---------- lib/PublicInbox/SearchIdxThread.pm | 19 ++++++++++++------- 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 00b24d6..b5d43d1 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -141,18 +141,20 @@ sub add_val ($$$) { $doc->add_value($col, $num); } -sub add_values ($$$$) { - my ($smsg, $bytes, $num, $lines) = @_; +sub add_values ($$) { + my ($doc, $values) = @_; - my $ts = $smsg->ts; - my $doc = $smsg->{doc}; - add_val($doc, &PublicInbox::Search::TS, $ts); + my $ts = $values->[PublicInbox::Search::TS]; + add_val($doc, PublicInbox::Search::TS, $ts); - defined($num) and add_val($doc, &PublicInbox::Search::NUM, $num); + my $num = $values->[PublicInbox::Search::NUM]; + defined($num) and add_val($doc, PublicInbox::Search::NUM, $num); - defined($bytes) and add_val($doc, &PublicInbox::Search::BYTES, $bytes); + my $bytes = $values->[PublicInbox::Search::BYTES]; + defined($bytes) and add_val($doc, PublicInbox::Search::BYTES, $bytes); - add_val($doc, &PublicInbox::Search::LINES, $lines); + my $lines = $values->[PublicInbox::Search::LINES]; + add_val($doc, PublicInbox::Search::LINES, $lines); my $yyyymmdd = strftime('%Y%m%d', gmtime($ts)); add_val($doc, PublicInbox::Search::YYYYMMDD, $yyyymmdd); @@ -307,7 +309,8 @@ sub add_message { } my $lines = $mime->body_raw =~ tr!\n!\n!; - add_values($smsg, $bytes, $num, $lines); + my @values = ($smsg->ts, $num, $bytes, $lines); + add_values($doc, \@values); my $tg = $self->term_generator; @@ -360,7 +363,8 @@ sub add_message { my $refs = parse_references($smsg); my $data = $smsg->to_doc_data($blob); if ($threader) { - $threader->thread_msg($mid, $smsg->ts, $xpath, $data); + push @values, $mid, $xpath, $data; + $threader->thread_msg(\@values); } else { link_message($self, $smsg, $refs, $old_tid); } diff --git a/lib/PublicInbox/SearchIdxThread.pm b/lib/PublicInbox/SearchIdxThread.pm index 57bb293..6b50eb0 100644 --- a/lib/PublicInbox/SearchIdxThread.pm +++ b/lib/PublicInbox/SearchIdxThread.pm @@ -61,30 +61,34 @@ sub thread_worker_loop { $xdb->begin_transaction; $txn = 1; } - eval { $self->thread_msg_real(@$msg) }; - warn "failed to index message <$msg->[0]>: $@\n" if $@; + eval { $self->thread_msg_real($msg) }; + warn "failed to index message <$msg->[-1]>: $@\n" if $@; } } } # called by a partition worker sub thread_msg { - my ($self, $mid, $ts, $xpath, $doc_data) = @_; + my ($self, $values) = @_; my $w = $self->{w}; my $err; - my $str = freeze([ $mid, $ts, $xpath, $doc_data ]); - my $len = length($str) . "\n"; + my $str = freeze($values); + $str = length($str) . "\n" . $str; # multiple processes write to the same pipe, so use flock $self->_lock_acquire; - print $w $len, $str or $err = $!; + print $w $str or $err = $!; $self->_lock_release; die "print failed: $err\n" if $err; } sub thread_msg_real { - my ($self, $mid, $ts, $xpath, $doc_data) = @_; + my ($self, $values) = @_; + my $doc_data = pop @$values; + my $xpath = pop @$values; + my $mid = pop @$values; + my $ts = $values->[PublicInbox::Search::TS]; my $smsg = $self->lookup_message($mid); my ($old_tid, $doc_id); if ($smsg) { @@ -99,6 +103,7 @@ sub thread_msg_real { my $doc = $smsg->{doc}; $doc->add_term('XPATH' . $xpath) if defined $xpath; $doc->add_term('XMID' . $mid); + PublicInbox::SearchIdx::add_values($doc, $values); $doc->set_data($doc_data); $smsg->{ts} = $ts; $smsg->load_from_data($doc_data); -- EW