unofficial mirror of meta@public-inbox.org
 help / color / mirror / Atom feed
* [PATCH] search: translate d: to dt: in query
@ 2023-02-19  8:18 Eric Wong
  0 siblings, 0 replies; only message in thread
From: Eric Wong @ 2023-02-19  8:18 UTC (permalink / raw)
  To: meta

dt: is higher resolution and the YYYYMMDD column will be dropped
if there's ever another SCHEMA_VERSION update.  While the
upcoming code repo index is independent of the mail schemas,
it'll use similar query prefixes and likely use d:/dt: for
Author Date of git commits.
---
 lib/PublicInbox/Search.pm | 44 +++++++++------------------------------
 t/search.t                | 21 +++++++++++--------
 2 files changed, 22 insertions(+), 43 deletions(-)

diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 7f470f29..e858729a 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -277,42 +277,18 @@ sub date_parse_prepare {
 	my $end = $range =~ s/([\)\s]*)\z// ? $1 : '';
 	my @r = split(/\.\./, $range, 2);
 
-	# expand "d:20101002" => "d:20101002..20101003" and like
+	# expand "dt:2010-10-02" => "dt:2010-10-02..2010-10-03" and like
 	# n.b. git doesn't do YYYYMMDD w/o '-', it needs YYYY-MM-DD
-	# We upgrade "d:" to "dt:" to iff using approxidate
+	# We upgrade "d:" to "dt:" unconditionally
 	if ($pfx eq 'd') {
-		my $fmt = "\0%Y%m%d";
-		if (!defined($r[1])) {
-			if ($r[0] =~ /\A([0-9]{4})([0-9]{2})([0-9]{2})\z/) {
-				push @$to_parse, "$1-$2-$3";
-				# we could've handled as-is, but we need
-				# to parse anyways for "d+" below
-			} else {
-				push @$to_parse, $r[0];
-				if ($r[0] !~ /\A[0-9]{4}-[0-9]{2}-[0-9]{2}\z/) {
-					$pfx = 'dt';
-					$fmt = "\0%Y%m%d%H%M%S";
-				}
-			}
-			$r[0] = "$fmt+$#$to_parse\0";
-			$r[1] = "$fmt+\0";
-		} else {
-			for my $x (@r) {
-				next if $x eq '' || $x =~ /\A[0-9]{8}\z/;
-				push @$to_parse, $x;
-				if ($x !~ /\A[0-9]{4}-[0-9]{2}-[0-9]{2}\z/) {
-					$pfx = 'dt';
-				}
-				$x = "$fmt$#$to_parse\0";
-			}
-			if ($pfx eq 'dt') {
-				for (@r) {
-					s/\0%Y%m%d/\0%Y%m%d%H%M%S/;
-					s/\A([0-9]{8})\z/${1}000000/;
-				}
-			}
-		}
-	} elsif ($pfx eq 'dt') {
+		$pfx = 'dt';
+		# upgrade YYYYMMDD to YYYYMMDDHHMMSS
+		$_ .= ' 00:00:00' for (grep(m!\A[0-9]{4}[^[:alnum:]]
+					[0-9]{2}[^[:alnum:]]
+					[0-9]{2}\z!x, @r));
+		$_ .= '000000' for (grep(m!\A[0-9]{8}\z!, @r));
+	}
+	if ($pfx eq 'dt') {
 		if (!defined($r[1])) { # git needs gaps and not /\d{14}/
 			if ($r[0] =~ /\A([0-9]{4})([0-9]{2})([0-9]{2})
 					([0-9]{2})([0-9]{2})([0-9]{2})\z/x) {
diff --git a/t/search.t b/t/search.t
index 13210ff5..dded6c40 100644
--- a/t/search.t
+++ b/t/search.t
@@ -1,8 +1,8 @@
-# Copyright (C) 2015-2021 all contributors <meta@public-inbox.org>
+#!perl -w
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 use strict;
-use warnings;
-use Test::More;
+use v5.10;
 use PublicInbox::TestCommon;
 require_mods(qw(DBD::SQLite Search::Xapian));
 require PublicInbox::SearchIdx;
@@ -565,10 +565,13 @@ SKIP: {
 		skip 'too close to midnight, time is tricky', 6;
 	}
 	$q = $s->query_argv_to_string($g, [qw(d:20101002 blah)]);
-	is($q, 'd:20101002..20101003 blah', 'YYYYMMDD expanded to range');
+	is($q, 'dt:20101002000000..20101003000000 blah',
+		'YYYYMMDD expanded to range');
 	$q = $s->query_argv_to_string($g, [qw(d:2010-10-02)]);
-	is($q, 'd:20101002..20101003', 'YYYY-MM-DD expanded to range');
+	is($q, 'dt:20101002000000..20101003000000',
+		'YYYY-MM-DD expanded to range');
 	$q = $s->query_argv_to_string($g, [qw(rt:2010-10-02.. yy)]);
+	diag "q=$q";
 	$q =~ /\Art:(\d+)\.\. yy/ or fail("rt: expansion failed: $q");
 	is(strftime('%Y-%m-%d', gmtime($1//0)), '2010-10-02', 'rt: beg expand');
 	$q = $s->query_argv_to_string($g, [qw(rt:..2010-10-02 zz)]);
@@ -615,7 +618,7 @@ SKIP: {
 
 	$orig = $qs = qq[f:bob "hello world" d:1993-10-02..2010-10-02];
 	$s->query_approxidate($g, $qs);
-	is($qs, qq[f:bob "hello world" d:19931002..20101002],
+	is($qs, qq[f:bob "hello world" dt:19931002000000..20101002000000],
 		'post-phrase date corrected');
 
 	# Xapian uses "" to escape " inside phrases, we don't explictly
@@ -627,7 +630,7 @@ SKIP: {
 		is($qs, $orig, 'phrases unchanged \x'.ord($x).'-\x'.ord($y));
 
 		$s->query_approxidate($g, my $tmp = "$qs d:..2010-10-02");
-		is($tmp, "$orig d:..20101002",
+		is($tmp, "$orig dt:..20101002000000",
 			'two phrases did not throw off date parsing');
 
 		$orig = $qs = qq[${x}hello d:1993-10-02..$y$x world$y];
@@ -635,7 +638,7 @@ SKIP: {
 		is($qs, $orig, 'phrases unchanged \x'.ord($x).'-\x'.ord($y));
 
 		$s->query_approxidate($g, $tmp = "$qs d:..2010-10-02");
-		is($tmp, "$orig d:..20101002",
+		is($tmp, "$orig dt:..20101002000000",
 			'two phrases did not throw off date parsing');
 	}
 
@@ -654,7 +657,7 @@ SKIP: {
 		skip 'TEST_EXPENSIVE not set for argv overflow check', 1;
 	my @w;
 	local $SIG{__WARN__} = sub { push @w, @_ }; # for pure Perl version
-	my @fail = map { 'd:1993-10-02..2010-10-02' } (1..(4096 * 32));
+	my @fail = map { 'dt:1993-10-02..2010-10-02' } (1..(4096 * 32));
 	eval { $s->query_argv_to_string($g, \@fail) };
 	ok($@, 'exception raised');
 }

^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2023-02-19  8:18 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-02-19  8:18 [PATCH] search: translate d: to dt: in query Eric Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).