unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
* [PATCH 0/2] search: support YYYYMMDD search ranges
@ 2016-08-16  8:49 Eric Wong
  2016-08-16  8:49 ` [PATCH 1/2] search: drop pointless range processors for Unix timestamp Eric Wong
  2016-08-16  8:49 ` [PATCH 2/2] search: add YYYYMMDD search range via "d:" prefix Eric Wong
  0 siblings, 2 replies; 3+ messages in thread
From: Eric Wong @ 2016-08-16  8:49 UTC (permalink / raw)
  To: meta

Not deployed to clear-net sites, yet, I'm reindexing the
http://czquwvybam4bgbro.onion/git/ onion right now.

Eric Wong (2):
      search: drop pointless range processors for Unix timestamp
      search: add YYYYMMDD search range via "d:" prefix

 lib/PublicInbox/Search.pm    | 13 +++----------
 lib/PublicInbox/SearchIdx.pm |  7 ++++++-
 t/search.t                   |  9 +++++++++
 3 files changed, 18 insertions(+), 11 deletions(-)


^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH 1/2] search: drop pointless range processors for Unix timestamp
  2016-08-16  8:49 [PATCH 0/2] search: support YYYYMMDD search ranges Eric Wong
@ 2016-08-16  8:49 ` Eric Wong
  2016-08-16  8:49 ` [PATCH 2/2] search: add YYYYMMDD search range via "d:" prefix Eric Wong
  1 sibling, 0 replies; 3+ messages in thread
From: Eric Wong @ 2016-08-16  8:49 UTC (permalink / raw)
  To: meta

The Unix timestamp isn't meaningful for users searching,
we will start indexing the YYYYMMDD date stamp which may
use StringValueRangeProcessor, instead.
---
 lib/PublicInbox/Search.pm | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 1398ea4..61f0338 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -179,8 +179,6 @@ sub qp {
 	$qp->set_database($self->{xdb});
 	$qp->set_stemmer($self->stemmer);
 	$qp->set_stemming_strategy(STEM_SOME);
-	$qp->add_valuerangeprocessor($self->ts_range_processor);
-	$qp->add_valuerangeprocessor($self->date_range_processor);
 
 	while (my ($name, $prefix) = each %bool_pfx_external) {
 		$qp->add_boolean_prefix($name, $prefix);
@@ -205,14 +203,6 @@ sub qp {
 	$self->{query_parser} = $qp;
 }
 
-sub ts_range_processor {
-	$_[0]->{tsrp} ||= Search::Xapian::NumberValueRangeProcessor->new(TS);
-}
-
-sub date_range_processor {
-	$_[0]->{drp} ||= Search::Xapian::DateValueRangeProcessor->new(TS);
-}
-
 sub num_range_processor {
 	$_[0]->{nrp} ||= Search::Xapian::NumberValueRangeProcessor->new(NUM);
 }
-- 
EW


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH 2/2] search: add YYYYMMDD search range via "d:" prefix
  2016-08-16  8:49 [PATCH 0/2] search: support YYYYMMDD search ranges Eric Wong
  2016-08-16  8:49 ` [PATCH 1/2] search: drop pointless range processors for Unix timestamp Eric Wong
@ 2016-08-16  8:49 ` Eric Wong
  1 sibling, 0 replies; 3+ messages in thread
From: Eric Wong @ 2016-08-16  8:49 UTC (permalink / raw)
  To: meta

This is similar to mairix in that it uses a "d:" prefix; but
only takes YYYYMMDD, for now.  Using custom date/time parsers
via Perl will be much more work:

	nntp://news.gmane.org/20151005222157.GE5880@survex.com

Anyhow, this ought to be more human-friendly than searching by
Unix timestamps, but it requires reindexing to take advantage of.
---
 lib/PublicInbox/Search.pm    | 3 +++
 lib/PublicInbox/SearchIdx.pm | 7 ++++++-
 t/search.t                   | 9 +++++++++
 3 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 61f0338..7561ef4 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -12,6 +12,7 @@ use constant TS => 0; # timestamp
 use constant NUM => 1; # NNTP article number
 use constant BYTES => 2; # :bytes as defined in RFC 3977
 use constant LINES => 3; # :lines as defined in RFC 3977
+use constant YYYYMMDD => 4; # for searching in the WWW UI
 
 use Search::Xapian qw/:standard/;
 use PublicInbox::SearchMsg;
@@ -179,6 +180,8 @@ sub qp {
 	$qp->set_database($self->{xdb});
 	$qp->set_stemmer($self->stemmer);
 	$qp->set_stemming_strategy(STEM_SOME);
+	$qp->add_valuerangeprocessor(
+		Search::Xapian::StringValueRangeProcessor->new(YYYYMMDD, 'd:'));
 
 	while (my ($name, $prefix) = each %bool_pfx_external) {
 		$qp->add_boolean_prefix($name, $prefix);
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index f8155ec..f54f5f2 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -17,6 +17,7 @@ use base qw(PublicInbox::Search);
 use PublicInbox::MID qw/mid_clean id_compress mid_mime/;
 use PublicInbox::MsgIter;
 use Carp qw(croak);
+use POSIX qw(strftime);
 require PublicInbox::Git;
 *xpfx = *PublicInbox::Search::xpfx;
 
@@ -128,7 +129,8 @@ sub add_message {
 			$doc->add_term(xpfx('path') . id_compress($path));
 		}
 
-		add_val($doc, &PublicInbox::Search::TS, $smsg->ts);
+		my $ts = $smsg->ts;
+		add_val($doc, &PublicInbox::Search::TS, $ts);
 
 		defined($num) and
 			add_val($doc, &PublicInbox::Search::NUM, $num);
@@ -139,6 +141,9 @@ sub add_message {
 		add_val($doc, &PublicInbox::Search::LINES,
 				$mime->body_raw =~ tr!\n!\n!);
 
+		my $yyyymmdd = strftime('%Y%m%d', gmtime($ts));
+		$doc->add_value(&PublicInbox::Search::YYYYMMDD, $yyyymmdd);
+
 		my $tg = $self->term_generator;
 
 		$tg->set_document($doc);
diff --git a/t/search.t b/t/search.t
index 2685348..db94c0a 100644
--- a/t/search.t
+++ b/t/search.t
@@ -315,6 +315,15 @@ sub filter_mids {
 		'UTF-8 subject preserved');
 }
 
+{
+	my $res = $ro->query('d:19931002..20101002');
+	ok(scalar @{$res->{msgs}} > 0, 'got results within range');
+	$res = $ro->query('d:20101003..');
+	is(scalar @{$res->{msgs}}, 0, 'nothing after 20101003');
+	$res = $ro->query('d:..19931001');
+	is(scalar @{$res->{msgs}}, 0, 'nothing before 19931001');
+}
+
 done_testing();
 
 1;
-- 
EW


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2016-08-16  8:49 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-08-16  8:49 [PATCH 0/2] search: support YYYYMMDD search ranges Eric Wong
2016-08-16  8:49 ` [PATCH 1/2] search: drop pointless range processors for Unix timestamp Eric Wong
2016-08-16  8:49 ` [PATCH 2/2] search: add YYYYMMDD search range via "d:" prefix Eric Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).