From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.2 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF shortcircuit=no autolearn=ham autolearn_force=no version=3.4.6 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 860201F626 for ; Sun, 19 Feb 2023 08:18:14 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1676794694; bh=72+JhjrbCFoQVcGCyRUWKW774LiVUnHgfOpLp0ngQEo=; h=From:To:Subject:Date:From; b=0jk9J8/F2C6xtHRfM9QD08TrtBA5yxKTPuym85nwuX8LKHFyoXGHP3V7W/D6zDZ+/ OUFzq7xuCjiexbw68EVSENKc8HoLdZ6DivPQKysPsJ1I0PCw7XGIIbSOoCbAIsWD4q 9FL7PwZI/TcTubNXzNhMWWA+povrWPP35rhY/rh8= From: Eric Wong To: meta@public-inbox.org Subject: [PATCH] search: translate d: to dt: in query Date: Sun, 19 Feb 2023 08:18:14 +0000 Message-Id: <20230219081814.291081-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: dt: is higher resolution and the YYYYMMDD column will be dropped if there's ever another SCHEMA_VERSION update. While the upcoming code repo index is independent of the mail schemas, it'll use similar query prefixes and likely use d:/dt: for Author Date of git commits. --- lib/PublicInbox/Search.pm | 44 +++++++++------------------------------ t/search.t | 21 +++++++++++-------- 2 files changed, 22 insertions(+), 43 deletions(-) diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 7f470f29..e858729a 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -277,42 +277,18 @@ sub date_parse_prepare { my $end = $range =~ s/([\)\s]*)\z// ? $1 : ''; my @r = split(/\.\./, $range, 2); - # expand "d:20101002" => "d:20101002..20101003" and like + # expand "dt:2010-10-02" => "dt:2010-10-02..2010-10-03" and like # n.b. git doesn't do YYYYMMDD w/o '-', it needs YYYY-MM-DD - # We upgrade "d:" to "dt:" to iff using approxidate + # We upgrade "d:" to "dt:" unconditionally if ($pfx eq 'd') { - my $fmt = "\0%Y%m%d"; - if (!defined($r[1])) { - if ($r[0] =~ /\A([0-9]{4})([0-9]{2})([0-9]{2})\z/) { - push @$to_parse, "$1-$2-$3"; - # we could've handled as-is, but we need - # to parse anyways for "d+" below - } else { - push @$to_parse, $r[0]; - if ($r[0] !~ /\A[0-9]{4}-[0-9]{2}-[0-9]{2}\z/) { - $pfx = 'dt'; - $fmt = "\0%Y%m%d%H%M%S"; - } - } - $r[0] = "$fmt+$#$to_parse\0"; - $r[1] = "$fmt+\0"; - } else { - for my $x (@r) { - next if $x eq '' || $x =~ /\A[0-9]{8}\z/; - push @$to_parse, $x; - if ($x !~ /\A[0-9]{4}-[0-9]{2}-[0-9]{2}\z/) { - $pfx = 'dt'; - } - $x = "$fmt$#$to_parse\0"; - } - if ($pfx eq 'dt') { - for (@r) { - s/\0%Y%m%d/\0%Y%m%d%H%M%S/; - s/\A([0-9]{8})\z/${1}000000/; - } - } - } - } elsif ($pfx eq 'dt') { + $pfx = 'dt'; + # upgrade YYYYMMDD to YYYYMMDDHHMMSS + $_ .= ' 00:00:00' for (grep(m!\A[0-9]{4}[^[:alnum:]] + [0-9]{2}[^[:alnum:]] + [0-9]{2}\z!x, @r)); + $_ .= '000000' for (grep(m!\A[0-9]{8}\z!, @r)); + } + if ($pfx eq 'dt') { if (!defined($r[1])) { # git needs gaps and not /\d{14}/ if ($r[0] =~ /\A([0-9]{4})([0-9]{2})([0-9]{2}) ([0-9]{2})([0-9]{2})([0-9]{2})\z/x) { diff --git a/t/search.t b/t/search.t index 13210ff5..dded6c40 100644 --- a/t/search.t +++ b/t/search.t @@ -1,8 +1,8 @@ -# Copyright (C) 2015-2021 all contributors +#!perl -w +# Copyright (C) all contributors # License: AGPL-3.0+ use strict; -use warnings; -use Test::More; +use v5.10; use PublicInbox::TestCommon; require_mods(qw(DBD::SQLite Search::Xapian)); require PublicInbox::SearchIdx; @@ -565,10 +565,13 @@ SKIP: { skip 'too close to midnight, time is tricky', 6; } $q = $s->query_argv_to_string($g, [qw(d:20101002 blah)]); - is($q, 'd:20101002..20101003 blah', 'YYYYMMDD expanded to range'); + is($q, 'dt:20101002000000..20101003000000 blah', + 'YYYYMMDD expanded to range'); $q = $s->query_argv_to_string($g, [qw(d:2010-10-02)]); - is($q, 'd:20101002..20101003', 'YYYY-MM-DD expanded to range'); + is($q, 'dt:20101002000000..20101003000000', + 'YYYY-MM-DD expanded to range'); $q = $s->query_argv_to_string($g, [qw(rt:2010-10-02.. yy)]); + diag "q=$q"; $q =~ /\Art:(\d+)\.\. yy/ or fail("rt: expansion failed: $q"); is(strftime('%Y-%m-%d', gmtime($1//0)), '2010-10-02', 'rt: beg expand'); $q = $s->query_argv_to_string($g, [qw(rt:..2010-10-02 zz)]); @@ -615,7 +618,7 @@ SKIP: { $orig = $qs = qq[f:bob "hello world" d:1993-10-02..2010-10-02]; $s->query_approxidate($g, $qs); - is($qs, qq[f:bob "hello world" d:19931002..20101002], + is($qs, qq[f:bob "hello world" dt:19931002000000..20101002000000], 'post-phrase date corrected'); # Xapian uses "" to escape " inside phrases, we don't explictly @@ -627,7 +630,7 @@ SKIP: { is($qs, $orig, 'phrases unchanged \x'.ord($x).'-\x'.ord($y)); $s->query_approxidate($g, my $tmp = "$qs d:..2010-10-02"); - is($tmp, "$orig d:..20101002", + is($tmp, "$orig dt:..20101002000000", 'two phrases did not throw off date parsing'); $orig = $qs = qq[${x}hello d:1993-10-02..$y$x world$y]; @@ -635,7 +638,7 @@ SKIP: { is($qs, $orig, 'phrases unchanged \x'.ord($x).'-\x'.ord($y)); $s->query_approxidate($g, $tmp = "$qs d:..2010-10-02"); - is($tmp, "$orig d:..20101002", + is($tmp, "$orig dt:..20101002000000", 'two phrases did not throw off date parsing'); } @@ -654,7 +657,7 @@ SKIP: { skip 'TEST_EXPENSIVE not set for argv overflow check', 1; my @w; local $SIG{__WARN__} = sub { push @w, @_ }; # for pure Perl version - my @fail = map { 'd:1993-10-02..2010-10-02' } (1..(4096 * 32)); + my @fail = map { 'dt:1993-10-02..2010-10-02' } (1..(4096 * 32)); eval { $s->query_argv_to_string($g, \@fail) }; ok($@, 'exception raised'); }