unofficial mirror of notmuch@notmuchmail.org
 help / color / mirror / code / Atom feed
* [RFC PATCH 0/2] natural language date range search
@ 2012-02-19 22:55 Jani Nikula
  2012-02-19 22:55 ` [RFC PATCH 1/2] lib: add date/time parser Jani Nikula
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: Jani Nikula @ 2012-02-19 22:55 UTC (permalink / raw)
  To: notmuch

Hi all, these patches add support for natural language date range search
of the form date:since..until, where since and until can be fairly free
form date/time expressions in English.

Examples:

date:two-days..yesterday (all mail in the two days before today)
date:12h.. (all mail since 12 hrs ago)
date:november..november (all mail in previous november)
date:2011.. (all mail since the beginning of 2011)
date:last-week..this-week (all mail over last and current week)
date:5/10/2011-12:34:55..10pm_2012-01-14

Plus plenty more and combinations of the above.

The repository for the date/time parser with a command line tool is at
[1], and there's a README [2] with a bunch of details too.


BR,
Jani.


[1] https://gitorious.org/parse-time-string/parse-time-string
[2] https://gitorious.org/parse-time-string/parse-time-string/blobs/master/README


Jani Nikula (2):
  lib: add date/time parser
  lib: add date range search

 lib/Makefile.local      |    2 +
 lib/database-private.h  |    1 +
 lib/database.cc         |    4 +
 lib/getdate-proc.cc     |   34 ++
 lib/getdate-proc.h      |   21 +
 lib/parse-time-string.c | 1304 +++++++++++++++++++++++++++++++++++++++++++++++
 lib/parse-time-string.h |   95 ++++
 7 files changed, 1461 insertions(+), 0 deletions(-)
 create mode 100644 lib/getdate-proc.cc
 create mode 100644 lib/getdate-proc.h
 create mode 100644 lib/parse-time-string.c
 create mode 100644 lib/parse-time-string.h

-- 
1.7.5.4

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [RFC PATCH 1/2] lib: add date/time parser
  2012-02-19 22:55 [RFC PATCH 0/2] natural language date range search Jani Nikula
@ 2012-02-19 22:55 ` Jani Nikula
  2012-02-26  8:45   ` Mark Walters
  2012-02-19 22:55 ` [RFC PATCH 2/2] lib: add date range search Jani Nikula
  2012-02-25 15:05 ` [RFC PATCH 0/2] natural language " Tomi Ollila
  2 siblings, 1 reply; 8+ messages in thread
From: Jani Nikula @ 2012-02-19 22:55 UTC (permalink / raw)
  To: notmuch

Signed-off-by: Jani Nikula <jani@nikula.org>
---
 lib/Makefile.local      |    1 +
 lib/parse-time-string.c | 1304 +++++++++++++++++++++++++++++++++++++++++++++++
 lib/parse-time-string.h |   95 ++++
 3 files changed, 1400 insertions(+), 0 deletions(-)
 create mode 100644 lib/parse-time-string.c
 create mode 100644 lib/parse-time-string.h

diff --git a/lib/Makefile.local b/lib/Makefile.local
index 54c4dea..803a284 100644
--- a/lib/Makefile.local
+++ b/lib/Makefile.local
@@ -53,6 +53,7 @@ libnotmuch_c_srcs =		\
 	$(dir)/libsha1.c	\
 	$(dir)/message-file.c	\
 	$(dir)/messages.c	\
+	$(dir)/parse-time-string.c	\
 	$(dir)/sha1.c		\
 	$(dir)/tags.c
 
diff --git a/lib/parse-time-string.c b/lib/parse-time-string.c
new file mode 100644
index 0000000..59713dc
--- /dev/null
+++ b/lib/parse-time-string.c
@@ -0,0 +1,1304 @@
+/*
+ * parse time string - user friendly date and time parser
+ * Copyright © 2012 Jani Nikula
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Jani Nikula <jani@nikula.org>
+ */
+
+#ifndef PARSE_TIME_DEBUG
+#define NDEBUG /* for assert() */
+#endif
+
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/types.h>
+
+#include "parse-time-string.h"
+
+#define ARRAY_SIZE(a) (sizeof (a) / sizeof (a[0]))
+
+/* field indices in struct state tm, and set fields */
+enum field {
+    /* keep SEC...YEAR in this order */
+    TM_ABS_SEC,		/* seconds */
+    TM_ABS_MIN,		/* minutes */
+    TM_ABS_HOUR,	/* hours */
+    TM_ABS_MDAY,	/* day of the month */
+    TM_ABS_MON,		/* month */
+    TM_ABS_YEAR,	/* year */
+
+    TM_ABS_WDAY,	/* day of the week. special: may be relative */
+    TM_ABS_ISDST,	/* daylight saving time */
+
+    TM_AMPM,		/* am vs. pm */
+    TM_TZ,		/* timezone in minutes */
+
+    /* keep SEC...YEAR in this order */
+    TM_REL_SEC,		/* seconds relative to now */
+    TM_REL_MIN,		/* minutes ... */
+    TM_REL_HOUR,	/* hours ... */
+    TM_REL_DAY,		/* days ... */
+    TM_REL_MON,		/* months ... */
+    TM_REL_YEAR,	/* years ... */
+    TM_REL_WEEK,	/* weeks ... */
+
+    TM_NONE,		/* not a field */
+
+    TM_SIZE = TM_NONE,
+};
+
+enum field_set {
+    FIELD_UNSET,
+    FIELD_SET,
+    FIELD_NOW,
+};
+
+static enum field
+next_field (enum field field)
+{
+    /* note: depends on the enum ordering */
+    return field < TM_ABS_YEAR ? field + 1 : TM_NONE;
+}
+
+static enum field
+abs_to_rel_field (enum field field)
+{
+    assert (field <= TM_ABS_YEAR);
+
+    /* note: depends on the enum ordering */
+    return field + (TM_REL_SEC - TM_ABS_SEC);
+}
+
+/* get zero value for field */
+static int
+field_zero (enum field field)
+{
+    if (field == TM_ABS_MDAY || field == TM_ABS_MON)
+	return 1;
+    else if (field == TM_ABS_YEAR)
+	return 1970;
+    else
+	return 0;
+}
+
+struct state {
+    int tm[TM_SIZE];			/* parsed date and time */
+    enum field_set set[TM_SIZE];	/* set status of tm */
+
+    enum field last_field;
+    char delim;
+
+    int postponed_length;	/* number of digits in postponed value */
+    int postponed_value;
+};
+
+/*
+ * Helpers for postponed numbers.
+ *
+ * postponed_length is the number of digits in postponed value. 0
+ * means there is no postponed number. -1 means there is a postponed
+ * number, but it comes from a keyword, and it doesn't have digits.
+ */
+static int
+get_postponed_length (struct state *state)
+{
+    return state->postponed_length;
+}
+
+static bool
+get_postponed_number (struct state *state, int *v, int *n)
+{
+    if (!state->postponed_length)
+	return false;
+
+    if (n)
+	*n = state->postponed_length;
+
+    if (v)
+	*v = state->postponed_value;
+
+    state->postponed_length = 0;
+    state->postponed_value = 0;
+
+    return true;
+}
+
+/* parse postponed number if one exists */
+static int parse_postponed_number (struct state *state, int v, int n);
+static int
+handle_postponed_number (struct state *state)
+{
+    int v = state->postponed_value;
+    int n = state->postponed_length;
+
+    if (!n)
+	return 0;
+
+    state->postponed_value = 0;
+    state->postponed_length = 0;
+
+    return parse_postponed_number (state, v, n);
+}
+
+/*
+ * set new postponed number to be handled later. if one exists
+ * already, handle it first. n may be -1 to indicate a keyword that
+ * has no number length.
+ */
+static int
+set_postponed_number (struct state *state, int v, int n)
+{
+    int r;
+
+    /* parse previous postponed number, if any */
+    r = handle_postponed_number (state);
+    if (r)
+	return r;
+
+    state->postponed_length = n;
+    state->postponed_value = v;
+
+    return 0;
+}
+
+static void
+set_delim (struct state *state, char delim)
+{
+    state->delim = delim;
+}
+
+static void
+unset_delim (struct state *state)
+{
+    state->delim = 0;
+}
+
+/*
+ * Field set/get/mod helpers.
+ */
+
+/* returns unset for non-tracked fields */
+static bool
+is_field_set (struct state *state, enum field field)
+{
+    assert (field < ARRAY_SIZE (state->tm));
+
+    return field < ARRAY_SIZE (state->set) &&
+	   state->set[field] != FIELD_UNSET;
+}
+
+static void
+unset_field (struct state *state, enum field field)
+{
+    assert (field < ARRAY_SIZE (state->tm));
+
+    state->set[field] = FIELD_UNSET;
+    state->tm[field] = 0;
+}
+
+/* Set field to value. */
+static int
+set_field (struct state *state, enum field field, int value)
+{
+    int r;
+
+    assert (field < ARRAY_SIZE (state->tm));
+
+    /* some fields can only be set once */
+    if (field < ARRAY_SIZE (state->set) && state->set[field] != FIELD_UNSET)
+	return -PARSE_TIME_ERR_ALREADYSET;
+
+    state->set[field] = FIELD_SET;
+
+    /*
+     * REVISIT: There could be a "next_field" that would be set from
+     * "field" for the duration of the handle_postponed_number() call,
+     * so it has more information to work with.
+     */
+
+    /* parse postponed number, if any */
+    r = handle_postponed_number (state);
+    if (r)
+	return r;
+
+    unset_delim (state);
+
+    state->tm[field] = value;
+    state->last_field = field;
+
+    return 0;
+}
+
+/*
+ * Mark n fields in fields to be set to current date/time in the
+ * specified time zone, or local timezone if not specified. The fields
+ * will be initialized after parsing is complete and timezone is
+ * known.
+ */
+static int
+set_fields_to_now (struct state *state, enum field *fields, size_t n)
+{
+    size_t i;
+    int r;
+
+    for (i = 0; i < n; i++) {
+	r = set_field (state, fields[i], 0);
+	if (r)
+	    return r;
+	state->set[fields[i]] = FIELD_NOW;
+    }
+
+    return 0;
+}
+
+/* Modify field by adding value to it. To be used on relative fields. */
+static int
+mod_field (struct state *state, enum field field, int value)
+{
+    int r;
+
+    assert (field < ARRAY_SIZE (state->tm));   /* assert relative??? */
+
+    if (field < ARRAY_SIZE (state->set))
+	state->set[field] = FIELD_SET;
+
+    /* parse postponed number, if any */
+    r = handle_postponed_number (state);
+    if (r)
+	return r;
+
+    unset_delim (state);
+
+    state->tm[field] += value;
+    state->last_field = field;
+
+    return 0;
+}
+
+/*
+ * Get field value. Make sure the field is set before query. It's most
+ * likely an error to call this while parsing (for example fields set
+ * as FIELD_NOW will only be set to some value after parsing).
+ */
+static int
+get_field (struct state *state, enum field field)
+{
+    assert (field < ARRAY_SIZE (state->tm));
+
+    return state->tm[field];
+}
+
+/* Unset indicator for time and date set helpers. */
+#define UNSET -1
+
+/* Time set helper. No input checking. Use UNSET (-1) to leave unset. */
+static int
+set_abs_time (struct state *state, int hour, int min, int sec)
+{
+    int r;
+
+    if (hour != UNSET) {
+	if ((r = set_field (state, TM_ABS_HOUR, hour)))
+	    return r;
+    }
+
+    if (min != UNSET) {
+	if ((r = set_field (state, TM_ABS_MIN, min)))
+	    return r;
+    }
+
+    if (sec != UNSET) {
+	if ((r = set_field (state, TM_ABS_SEC, sec)))
+	    return r;
+    }
+
+    return 0;
+}
+
+/* Date set helper. No input checking. Use UNSET (-1) to leave unset. */
+static int
+set_abs_date (struct state *state, int year, int mon, int mday)
+{
+    int r;
+
+    if (year != UNSET) {
+	if ((r = set_field (state, TM_ABS_YEAR, year)))
+	    return r;
+    }
+
+    if (mon != UNSET) {
+	if ((r = set_field (state, TM_ABS_MON, mon)))
+	    return r;
+    }
+
+    if (mday != UNSET) {
+	if ((r = set_field (state, TM_ABS_MDAY, mday)))
+	    return r;
+    }
+
+    return 0;
+}
+
+/*
+ * Keyword parsing and handling.
+ */
+struct keyword;
+typedef int (*setter_t)(struct state *state, struct keyword *kw);
+
+struct keyword {
+    const char *name;	/* keyword */
+    size_t minlen;	/* min length to match, 0 = must match all */
+    enum field field;	/* field to set, or FIELD_NONE if N/A */
+    int value;		/* value to set, or 0 if N/A */
+    setter_t set;	/* function to use for setting, if non-NULL */
+};
+
+/*
+ * Setter callback functions for keywords.
+ */
+static int
+kw_set_default (struct state *state, struct keyword *kw)
+{
+    return set_field (state, kw->field, kw->value);
+}
+
+static int
+kw_set_rel (struct state *state, struct keyword *kw)
+{
+    int multiplier = 1;
+
+    /* get a previously set multiplier, if any */
+    get_postponed_number (state, &multiplier, NULL);
+
+    /* accumulate relative field values */
+    return mod_field (state, kw->field, multiplier * kw->value);
+}
+
+static int
+kw_set_number (struct state *state, struct keyword *kw)
+{
+    /* -1 = no length, from keyword */
+    return set_postponed_number (state, kw->value, -1);
+}
+
+static int
+kw_set_month (struct state *state, struct keyword *kw)
+{
+    int n = get_postponed_length (state);
+
+    /* consume postponed number if it could be mday */
+    if (n == 1 || n == 2) {
+	int r, v;
+
+	get_postponed_number (state, &v, NULL);
+
+	if (v < 1 || v > 31)
+	    return -PARSE_TIME_ERR_INVALIDDATE;
+
+	r = set_field (state, TM_ABS_MDAY, v);
+	if (r)
+	    return r;
+    }
+
+    return set_field (state, kw->field, kw->value);
+}
+
+static int
+kw_set_ampm (struct state *state, struct keyword *kw)
+{
+    int n = get_postponed_length (state);
+
+    /* consume postponed number if it could be hour */
+    if (n == 1 || n == 2) {
+	int r, v;
+
+	get_postponed_number (state, &v, NULL);
+
+	if (v < 1 || v > 12)
+	    return -PARSE_TIME_ERR_INVALIDTIME;
+
+	r = set_abs_time (state, v, 0, 0);
+	if (r)
+	    return r;
+    }
+
+    return set_field (state, kw->field, kw->value);
+}
+
+static int
+kw_set_timeofday (struct state *state, struct keyword *kw)
+{
+    return set_abs_time (state, kw->value, 0, 0);
+}
+
+static int
+kw_set_today (struct state *state, struct keyword *kw)
+{
+    enum field fields[] = { TM_ABS_YEAR, TM_ABS_MON, TM_ABS_MDAY };
+
+    return set_fields_to_now (state, fields, ARRAY_SIZE (fields));
+}
+
+static int
+kw_set_now (struct state *state, struct keyword *kw)
+{
+    enum field fields[] = { TM_ABS_HOUR, TM_ABS_MIN, TM_ABS_SEC };
+
+    return set_fields_to_now (state, fields, ARRAY_SIZE (fields));
+}
+
+static int
+kw_set_ordinal (struct state *state, struct keyword *kw)
+{
+    int n, v;
+
+    /* require a postponed number */
+    if (!get_postponed_number (state, &v, &n))
+	return -PARSE_TIME_ERR_DATEFORMAT;
+
+    /* ordinals are mday */
+    if (n != 1 && n != 2)
+	return -PARSE_TIME_ERR_DATEFORMAT;
+
+    /* be strict about st, nd, rd, and lax about th */
+    if (strcasecmp (kw->name, "st") == 0 && v != 1 && v != 21 && v != 31)
+	return -PARSE_TIME_ERR_INVALIDDATE;
+    else if (strcasecmp (kw->name, "nd") == 0 && v != 2 && v != 22)
+	return -PARSE_TIME_ERR_INVALIDDATE;
+    else if (strcasecmp (kw->name, "rd") == 0 && v != 3 && v != 23)
+	return -PARSE_TIME_ERR_INVALIDDATE;
+    else if (strcasecmp (kw->name, "th") == 0 && (v < 1 || v > 31))
+	return -PARSE_TIME_ERR_INVALIDDATE;
+
+    return set_field (state, TM_ABS_MDAY, v);
+}
+
+/*
+ * Accepted keywords.
+ *
+ * If keyword begins with upper case letter, then the matching will be
+ * case sensitive. Otherwise the matching is case insensitive.
+ *
+ * If setter is NULL, set_default will be used.
+ *
+ * Note: Order matters. Matching is greedy, longest match is used, but
+ * of equal length matches the first one is used.
+ */
+static struct keyword keywords[] = {
+    /* weekdays */
+    { "sunday",		3,	TM_ABS_WDAY,	0,	NULL },
+    { "monday",		3,	TM_ABS_WDAY,	1,	NULL },
+    { "tuesday",	3,	TM_ABS_WDAY,	2,	NULL },
+    { "wednesday",	3,	TM_ABS_WDAY,	3,	NULL },
+    { "thursday",	3,	TM_ABS_WDAY,	4,	NULL },
+    { "friday",		3,	TM_ABS_WDAY,	5,	NULL },
+    { "saturday",	3,	TM_ABS_WDAY,	6,	NULL },
+
+    /* months */
+    { "january",	3,	TM_ABS_MON,	1,	kw_set_month },
+    { "february",	3,	TM_ABS_MON,	2,	kw_set_month },
+    { "march",		3,	TM_ABS_MON,	3,	kw_set_month },
+    { "april",		3,	TM_ABS_MON,	4,	kw_set_month },
+    { "may",		3,	TM_ABS_MON,	5,	kw_set_month },
+    { "june",		3,	TM_ABS_MON,	6,	kw_set_month },
+    { "july",		3,	TM_ABS_MON,	7,	kw_set_month },
+    { "august",		3,	TM_ABS_MON,	8,	kw_set_month },
+    { "september",	3,	TM_ABS_MON,	9,	kw_set_month },
+    { "october",	3,	TM_ABS_MON,	10,	kw_set_month },
+    { "november",	3,	TM_ABS_MON,	11,	kw_set_month },
+    { "december",	3,	TM_ABS_MON,	12,	kw_set_month },
+
+    /* durations */
+    { "years",		1,	TM_REL_YEAR,	1,	kw_set_rel },
+    { "weeks",		1,	TM_REL_WEEK,	1,	kw_set_rel },
+    { "days",		1,	TM_REL_DAY,	1,	kw_set_rel },
+    { "hours",		1,	TM_REL_HOUR,	1,	kw_set_rel },
+    { "hrs",		1,	TM_REL_HOUR,	1,	kw_set_rel },
+    /* M=months, m=minutes. single M must precede minutes in the list. */
+    { "M",		1,	TM_REL_MON,	1,	kw_set_rel },
+    { "minutes",	1,	TM_REL_MIN,	1,	kw_set_rel },
+    { "mins",		1,	TM_REL_MIN,	1,	kw_set_rel },
+    { "months",		1,	TM_REL_MON,	1,	kw_set_rel },
+    { "seconds",	1,	TM_REL_SEC,	1,	kw_set_rel },
+    { "secs",		1,	TM_REL_SEC,	1,	kw_set_rel },
+
+    /* numbers */
+    { "one",		0,	TM_NONE,	1,	kw_set_number },
+    { "two",		0,	TM_NONE,	2,	kw_set_number },
+    { "three",		0,	TM_NONE,	3,	kw_set_number },
+    { "four",		0,	TM_NONE,	4,	kw_set_number },
+    { "five",		0,	TM_NONE,	5,	kw_set_number },
+    { "six",		0,	TM_NONE,	6,	kw_set_number },
+    { "seven",		0,	TM_NONE,	7,	kw_set_number },
+    { "eight",		0,	TM_NONE,	8,	kw_set_number },
+    { "nine",		0,	TM_NONE,	9,	kw_set_number },
+    { "ten",		0,	TM_NONE,	10,	kw_set_number },
+    { "dozen",		0,	TM_NONE,	12,	kw_set_number },
+    { "hundred",	0,	TM_NONE,	100,	kw_set_number },
+
+    /* special number forms */
+    { "this",		0,	TM_NONE,	0,	kw_set_number },
+    { "last",		0,	TM_NONE,	1,	kw_set_number },
+
+    /* specials */
+    { "yesterday",	0,	TM_REL_DAY,	1,	kw_set_rel },
+    { "today",		0,	TM_NONE,	0,	kw_set_today },
+    { "now",		0,	TM_NONE,	0,	kw_set_now },
+    { "noon",		0,	TM_NONE,	12,	kw_set_timeofday },
+    { "midnight",	0,	TM_NONE,	0,	kw_set_timeofday },
+    { "am",		0,	TM_AMPM,	0,	kw_set_ampm },
+    { "a.m.",		0,	TM_AMPM,	0,	kw_set_ampm },
+    { "pm",		0,	TM_AMPM,	1,	kw_set_ampm },
+    { "p.m.",		0,	TM_AMPM,	1,	kw_set_ampm },
+    { "st",		0,	TM_NONE,	0,	kw_set_ordinal },
+    { "nd",		0,	TM_NONE,	0,	kw_set_ordinal },
+    { "rd",		0,	TM_NONE,	0,	kw_set_ordinal },
+    { "th",		0,	TM_NONE,	0,	kw_set_ordinal },
+
+    /* timezone codes: offset in minutes. FIXME: add more codes. */
+    { "pst",		0,	TM_TZ,		-8*60,	NULL },
+    { "mst",		0,	TM_TZ,		-7*60,	NULL },
+    { "cst",		0,	TM_TZ,		-6*60,	NULL },
+    { "est",		0,	TM_TZ,		-5*60,	NULL },
+    { "ast",		0,	TM_TZ,		-4*60,	NULL },
+    { "nst",		0,	TM_TZ,		-(3*60+30),	NULL },
+
+    { "gmt",		0,	TM_TZ,		0,	NULL },
+    { "utc",		0,	TM_TZ,		0,	NULL },
+
+    { "wet",		0,	TM_TZ,		0,	NULL },
+    { "cet",		0,	TM_TZ,		1*60,	NULL },
+    { "eet",		0,	TM_TZ,		2*60,	NULL },
+    { "fet",		0,	TM_TZ,		3*60,	NULL },
+
+    { "wat",		0,	TM_TZ,		1*60,	NULL },
+    { "cat",		0,	TM_TZ,		2*60,	NULL },
+    { "eat",		0,	TM_TZ,		3*60,	NULL },
+};
+
+/*
+ * Compare strings s and keyword. Return number of matching chars on
+ * match, 0 for no match. Match must be at least n chars (n == 0 all
+ * of keyword), otherwise it's not a match. Use match_case for case
+ * sensitive matching.
+ */
+static size_t
+stringcmp (const char *s, const char *keyword, size_t n, bool match_case)
+{
+    size_t i;
+
+    for (i = 0; *s && *keyword; i++, s++, keyword++) {
+	if (match_case) {
+	    if (*s != *keyword)
+		break;
+	} else {
+	    if (tolower ((unsigned char) *s) !=
+		tolower ((unsigned char) *keyword))
+		break;
+	}
+    }
+
+    if (n)
+	return i < n ? 0 : i;
+    else
+	return *keyword ? 0 : i;
+}
+
+/*
+ * Parse a keyword. Return < 0 on error, number of parsed chars on
+ * success.
+ */
+static ssize_t
+parse_keyword (struct state *state, const char *s)
+{
+    unsigned int i;
+    size_t n, max_n = 0;
+    struct keyword *kw = NULL;
+    int r;
+
+    /* Match longest keyword */
+    for (i = 0; i < ARRAY_SIZE (keywords); i++) {
+	/* Match case if keyword begins with upper case letter. */
+	bool mcase = isupper ((unsigned char) keywords[i].name[0]);
+
+	n = stringcmp (s, keywords[i].name, keywords[i].minlen, mcase);
+	if (n > max_n) {
+	    max_n = n;
+	    kw = &keywords[i];
+	}
+    }
+
+    if (!kw)
+	return -PARSE_TIME_ERR_KEYWORD;
+
+    if (kw->set)
+	r = kw->set (state, kw);
+    else
+	r = kw_set_default (state, kw);
+
+    return r < 0 ? r : max_n;
+}
+
+/*
+ * Non-keyword parsers and their helpers.
+ */
+
+static int
+set_user_tz (struct state *state, char sign, int hour, int min)
+{
+    int tz = hour * 60 + min;
+
+    assert (sign == '+' || sign == '-');
+
+    if (hour < 0 || hour > 14 || min < 0 || min > 60 || min % 15)
+	return -PARSE_TIME_ERR_INVALIDTIME;
+
+    if (sign == '-')
+	tz = -tz;
+
+    return set_field (state, TM_TZ, tz);
+}
+
+/*
+ * Independent parsing of a postponed number when it wasn't consumed
+ * during parsing of the following token.
+ *
+ * This should be able to trust that last_field and next_field are
+ * right.
+ */
+static int
+parse_postponed_number (struct state *state, int v, int n)
+{
+    /*
+     * alright, these are really lone, won't affect parsing of
+     * following items... it's not a multiplier, those have been eaten
+     * away.
+     *
+     * also note numbers eaten away by parse_single_number.
+     */
+
+    assert (n < 8);
+
+    switch (n) {
+    case 1:
+    case 2:
+	/* hour or mday or year */
+	if (state->last_field == TM_ABS_MON &&  /* FIXME: written mon! */
+	    !is_field_set (state, TM_ABS_MDAY)) {
+	    return set_field (state, TM_ABS_MDAY, v);
+	}
+	break;
+    case 4:
+	/* YYYY or +/-HHMM for TZ or HHMM or DDMM */
+	/* FIXME: state->delim is no longer right for this function!
+	 * why not, it could be! */
+	if (!is_field_set (state, TM_ABS_YEAR)) {
+	    /* FIXME: check year? */
+	    return set_field (state, TM_ABS_YEAR, v);
+	}
+	break;
+    case 6:
+	/* FIXME: HHMMSS or DDMMYY */
+	break;
+    case -1:
+	/* REVISIT */
+	break;
+    case 3:
+    case 5:
+    case 7:
+    default:
+	break;
+    }
+
+    return -PARSE_TIME_ERR_FORMAT;
+}
+
+/* Parse a single number. Typically postpone parsing until later. */
+static int
+parse_single_number (struct state *state, unsigned long v,
+		     unsigned long n)
+{
+    assert (n);
+
+    /* parse things that can be parsed immediately */
+    if (n == 8) {
+	/* YYYYMMDD */
+	int year = v / 10000;
+	int mon = (v / 100) % 100;
+	int mday = v % 100;
+
+	if (year < 1970 || mon < 1 || mon > 12 || mday < 1 || mday > 31)
+	    return -PARSE_TIME_ERR_INVALIDDATE;
+
+	return set_abs_date (state, year, mon, mday);
+    } else if (n > 8) {
+	/* FIXME: seconds since epoch */
+	return -PARSE_TIME_ERR_FORMAT;
+    }
+
+    if (v > INT_MAX)
+	return -PARSE_TIME_ERR_FORMAT;
+
+    return set_postponed_number (state, v, n);
+}
+
+static bool
+is_time_sep (char c)
+{
+    return c == ':';
+}
+
+static bool
+is_date_sep (char c)
+{
+    return c == '/' || c == '-' || c == '.';
+}
+
+static bool
+is_sep (char c)
+{
+    return is_time_sep (c) || is_date_sep (c);
+}
+
+/* two-digit year: 00...69 is 2000s, 70...99 1900s, if n == 0 keep unset */
+static int
+expand_year (unsigned long year, size_t n)
+{
+    if (n == 2) {
+	return (year < 70 ? 2000 : 1900) + year;
+    } else if (n == 4) {
+	return year;
+    } else {
+	return UNSET;
+    }
+}
+
+static int
+parse_date (struct state *state, char sep,
+	    unsigned long v1, unsigned long v2, unsigned long v3,
+	    size_t n1, size_t n2, size_t n3)
+{
+    int year = UNSET, mon = UNSET, mday = UNSET;
+
+    assert (is_date_sep (sep));
+
+    switch (sep) {
+    case '/': /* Date: M[M]/D[D][/YY[YY]] or M[M]/YYYY */
+	if (n1 != 1 && n1 != 2)
+	    return -PARSE_TIME_ERR_DATEFORMAT;
+
+	if ((n2 == 1 || n2 == 2) && (n3 == 0 || n3 == 2 || n3 == 4)) {
+	    /* M[M]/D[D][/YY[YY]] */
+	    year = expand_year (v3, n3);
+	    mon = v1;
+	    mday = v2;
+	} else if (n2 == 4 && n3 == 0) {
+	    /* M[M]/YYYY */
+	    year = v2;
+	    mon = v1;
+	} else {
+	    return -PARSE_TIME_ERR_DATEFORMAT;
+	}
+	break;
+
+    case '-': /* Date: YYYY-MM[-DD] or DD-MM[-YY[YY]] or MM-YYYY */
+	if (n1 == 4 && n2 == 2 && (n3 == 0 || n3 == 2)) {
+	    /* YYYY-MM[-DD] */
+	    year = v1;
+	    mon = v2;
+	    if (n3)
+		mday = v3;
+	} else if (n1 == 2 && n2 == 2 && (n3 == 0 || n3 == 2 || n3 == 4)) {
+	    /* DD-MM[-YY[YY]] */
+	    year = expand_year (v3, n3);
+	    mon = v2;
+	    mday = v1;
+	} else if (n1 == 2 && n2 == 4 && n3 == 0) {
+	    /* MM-YYYY */
+	    year = v2;
+	    mon = v1;
+	} else {
+	    return -PARSE_TIME_ERR_DATEFORMAT;
+	}
+	break;
+
+    case '.': /* Date: D[D].M[M][.[YY[YY]]] */
+	if ((n1 != 1 && n1 != 2) || (n2 != 1 && n2 != 2) ||
+	    (n3 != 0 && n3 != 2 && n3 != 4))
+	    return -PARSE_TIME_ERR_DATEFORMAT;
+
+	year = expand_year (v3, n3);
+	mon = v2;
+	mday = v1;
+	break;
+    }
+
+    if (year != UNSET && year < 1970)
+	return -PARSE_TIME_ERR_INVALIDDATE;
+
+    if (mon != UNSET && (mon < 1 || mon > 12))
+	return -PARSE_TIME_ERR_INVALIDDATE;
+
+    if (mday != UNSET && (mday < 1 || mday > 31))
+	return -PARSE_TIME_ERR_INVALIDDATE;
+
+    return set_abs_date (state, year, mon, mday);
+}
+
+static int
+parse_time (struct state *state, char sep,
+	    unsigned long v1, unsigned long v2, unsigned long v3,
+	    size_t n1, size_t n2, size_t n3)
+{
+    assert (is_time_sep (sep));
+
+    if ((n1 != 1 && n1 != 2) || n2 != 2 || (n3 != 0 && n3 != 2))
+	return -PARSE_TIME_ERR_TIMEFORMAT;
+
+    /*
+     * REVISIT: this means it's required to set time *before* being
+     * able to set timezone
+     */
+    if (is_field_set (state, TM_ABS_HOUR) &&
+	is_field_set (state, TM_ABS_MIN) &&
+	n1 == 2 && n2 == 2 && n3 == 0 &&
+	(state->delim == '+' || state->delim == '-')) {
+	return set_user_tz (state, state->delim, v1, v2);
+    }
+
+    if (v1 > 24 || v2 > 60 || v3 > 60)
+	return -PARSE_TIME_ERR_INVALIDTIME;
+
+    return set_abs_time (state, v1, v2, n3 ? v3 : 0);
+}
+
+/* strtoul helper that assigns length */
+static unsigned long
+strtoul_len (const char *s, const char **endp, size_t *len)
+{
+    unsigned long val = strtoul (s, (char **) endp, 10);
+
+    *len = *endp - s;
+    return val;
+}
+
+/*
+ * Parse a (group of) number(s). Return < 0 on error, number of parsed
+ * chars on success.
+ */
+static ssize_t
+parse_number (struct state *state, const char *s)
+{
+    int r;
+    unsigned long v1, v2, v3 = 0;
+    size_t n1, n2, n3 = 0;
+    const char *p = s;
+    char sep;
+
+    v1 = strtoul_len (p, &p, &n1);
+
+    if (is_sep (*p) && isdigit ((unsigned char) *(p + 1))) {
+	sep = *p;
+	v2 = strtoul_len (p + 1, &p, &n2);
+    } else {
+	/* a single number */
+	r = parse_single_number (state, v1, n1);
+	if (r)
+	    return r;
+
+	return p - s;
+    }
+
+    /* a group of two or three numbers? */
+    if (*p == sep && isdigit ((unsigned char) *(p + 1)))
+	v3 = strtoul_len (p + 1, &p, &n3);
+
+    if (is_time_sep (sep))
+	r = parse_time (state, sep, v1, v2, v3, n1, n2, n3);
+    else
+	r = parse_date (state, sep, v1, v2, v3, n1, n2, n3);
+
+    if (r)
+	return r;
+
+    return p - s;
+}
+
+/*
+ * Parse delimiter(s). Return < 0 on error, number of parsed chars on
+ * success.
+ */
+static ssize_t
+parse_delim (struct state *state, const char *s)
+{
+    const char *p = s;
+
+    /*
+     * REVISIT: any actions depending on the first delim after last
+     * field? what could it be?
+     */
+
+    /*
+     * skip non-alpha and non-digit, and store the last for further
+     * processing
+     */
+    while (*p && !isalnum ((unsigned char) *p)) {
+	set_delim (state, *p);
+	p++;
+    }
+
+    return p - s;
+}
+
+/*
+ * Parse a date/time string. Return < 0 on error, number of parsed
+ * chars on success.
+ */
+static ssize_t
+parse_input (struct state *state, const char *s)
+{
+    const char *p = s;
+    ssize_t n;
+    int r;
+
+    while (*p) {
+	if (isalpha ((unsigned char) *p)) {
+	    n = parse_keyword (state, p);
+	} else if (isdigit ((unsigned char) *p)) {
+	    n = parse_number (state, p);
+	} else {
+	    n = parse_delim (state, p);
+	}
+
+	if (n <= 0) {
+	    if (n == 0)
+		n = -PARSE_TIME_ERR;
+
+	    return n;             /* FIXME */
+	}
+
+	p += n;
+    }
+
+    /* parse postponed number, if any */
+    r = handle_postponed_number (state);
+    if (r < 0)
+	return r;
+
+    return p - s;
+}
+
+/*
+ * Processing the parsed input.
+ */
+
+/*
+ * Initialize reference time to tm. Use time zone in state if
+ * specified, otherwise local time. Use now for reference time if
+ * non-NULL, otherwise current time.
+ */
+static int
+initialize_now (struct state *state, struct tm *tm, const time_t *now)
+{
+    time_t t;
+
+    if (now) {
+	t = *now;
+    } else {
+	if (time (&t) == (time_t) -1)
+	    return -PARSE_TIME_ERR_LIB;
+    }
+
+    if (is_field_set (state, TM_TZ)) {
+	/* some other time zone */
+
+	/* adjust now according to the TZ */
+	t += get_field (state, TM_TZ) * 60;
+
+	/* it's not gm, but this doesn't mess with the tz */
+	if (gmtime_r (&t, tm) == NULL)
+	    return -PARSE_TIME_ERR_LIB;
+    } else {
+	/* local time */
+	if (localtime_r (&t, tm) == NULL)
+	    return -PARSE_TIME_ERR_LIB;
+    }
+
+    return 0;
+}
+
+/*
+ * Normalize tm according to mktime(3). Both mktime(3) and
+ * localtime_r(3) use local time, but they cancel each other out here,
+ * making this function agnostic to time zone.
+ */
+static int
+normalize_tm (struct tm *tm)
+{
+    time_t t = mktime (tm);
+
+    if (t == (time_t) -1)
+	return -PARSE_TIME_ERR_LIB;
+
+    if (!localtime_r (&t, tm))
+	return -PARSE_TIME_ERR_LIB;
+
+    return 0;
+}
+
+/* Get field out of a struct tm. */
+static int
+tm_get_field (const struct tm *tm, enum field field)
+{
+    switch (field) {
+    case TM_ABS_SEC:	return tm->tm_sec;
+    case TM_ABS_MIN:	return tm->tm_min;
+    case TM_ABS_HOUR:	return tm->tm_hour;
+    case TM_ABS_MDAY:	return tm->tm_mday;
+    case TM_ABS_MON:	return tm->tm_mon + 1; /* 0- to 1-based */
+    case TM_ABS_YEAR:	return 1900 + tm->tm_year;
+    case TM_ABS_WDAY:	return tm->tm_wday;
+    case TM_ABS_ISDST:	return tm->tm_isdst;
+    default:
+	assert (false);
+	break;
+    }
+
+    return 0;
+}
+
+/* Modify hour according to am/pm setting. */
+static int
+fixup_ampm (struct state *state)
+{
+    int hour, hdiff = 0;
+
+    if (!is_field_set (state, TM_AMPM))
+	return 0;
+
+    if (!is_field_set (state, TM_ABS_HOUR))
+	return -PARSE_TIME_ERR_TIMEFORMAT;
+
+    hour = get_field (state, TM_ABS_HOUR);
+    if (hour < 1 || hour > 12)
+	return -PARSE_TIME_ERR_INVALIDTIME;
+
+    if (get_field (state, TM_AMPM)) {
+	/* 12pm is noon */
+	if (hour != 12)
+	    hdiff = 12;
+    } else {
+	/* 12am is midnight, beginning of day */
+	if (hour == 12)
+	    hdiff = -12;
+    }
+
+    mod_field (state, TM_REL_HOUR, -hdiff);
+
+    return 0;
+}
+
+/* Combine absolute and relative fields, and round. */
+static int
+create_output (struct state *state, time_t *t_out, const time_t *tnow,
+	       int round)
+{
+    struct tm tm = { 0 };
+    struct tm now;
+    enum field f;
+    int r;
+    int week_round = PARSE_TIME_NO_ROUND;
+
+    r = initialize_now (state, &now, tnow);
+    if (r)
+	return r;
+
+    /* initialize uninitialized fields to now */
+    for (f = TM_ABS_SEC; f != TM_NONE; f = next_field (f)) {
+	if (state->set[f] == FIELD_NOW) {
+	    state->tm[f] = tm_get_field (&now, f);
+	    state->set[f] = FIELD_SET;
+	}
+    }
+
+    /*
+     * If MON is set but YEAR is not, refer to past month.
+     *
+     * REVISIT: Why are month/week special in this regard? What about
+     * mday, or time. Should refer to past.
+     */
+    if (is_field_set (state, TM_ABS_MON) &&
+	!is_field_set (state, TM_ABS_YEAR)) {
+	if (get_field (state, TM_ABS_MON) >= tm_get_field (&now, TM_ABS_MON))
+	    mod_field (state, TM_REL_YEAR, 1);
+    }
+
+    /*
+     * If WDAY is set but MDAY is not, we consider WDAY relative
+     *
+     * REVISIT: This fails on stuff like "two months ago monday"
+     * because two months ago wasn't the same day as today. Postpone
+     * until we know date?
+     */
+    if (is_field_set (state, TM_ABS_WDAY) &&
+	!is_field_set (state, TM_ABS_MDAY)) {
+	int wday = get_field (state, TM_ABS_WDAY);
+	int today = tm_get_field (&now, TM_ABS_WDAY);
+	int rel_days;
+
+	if (today > wday)
+	    rel_days = today - wday;
+	else
+	    rel_days = today + 7 - wday;
+
+	/* this also prevents special week rounding from happening */
+	mod_field (state, TM_REL_DAY, rel_days);
+
+	unset_field (state, TM_ABS_WDAY);
+    }
+
+    r = fixup_ampm (state);
+    if (r)
+	return r;
+
+    /*
+     * Iterate fields from least accurate to most accurate, and set
+     * unset fields according to requested rounding.
+     */
+    for (f = TM_ABS_SEC; f != TM_NONE; f = next_field (f)) {
+	if (round != PARSE_TIME_NO_ROUND) {
+	    enum field r = abs_to_rel_field (f);
+
+	    if (is_field_set (state, f) || is_field_set (state, r)) {
+		if (round >= PARSE_TIME_ROUND_UP)
+		    mod_field (state, r, -1);
+		round = PARSE_TIME_NO_ROUND; /* no more rounding */
+	    } else {
+		if (f == TM_ABS_MDAY &&
+		    is_field_set (state, TM_REL_WEEK)) {
+		    /* week is most accurate */
+		    week_round = round;
+		    round = PARSE_TIME_NO_ROUND;
+		} else {
+		    set_field (state, f, field_zero (f));
+		}
+	    }
+	}
+
+	if (!is_field_set (state, f))
+	    set_field (state, f, tm_get_field (&now, f));
+    }
+
+    /* special case: rounding with week accuracy */
+    if (week_round != PARSE_TIME_NO_ROUND) {
+	/* temporarily set more accurate fields to now */
+	set_field (state, TM_ABS_SEC, tm_get_field (&now, TM_ABS_SEC));
+	set_field (state, TM_ABS_MIN, tm_get_field (&now, TM_ABS_MIN));
+	set_field (state, TM_ABS_HOUR, tm_get_field (&now, TM_ABS_HOUR));
+	set_field (state, TM_ABS_MDAY, tm_get_field (&now, TM_ABS_MDAY));
+    }
+
+    /*
+     * set all fields. they may contain out of range values before
+     * normalization by mktime(3).
+     */
+    tm.tm_sec = get_field (state, TM_ABS_SEC) - get_field (state, TM_REL_SEC);
+    tm.tm_min = get_field (state, TM_ABS_MIN) - get_field (state, TM_REL_MIN);
+    tm.tm_hour = get_field (state, TM_ABS_HOUR) - get_field (state, TM_REL_HOUR);
+    tm.tm_mday = get_field (state, TM_ABS_MDAY) -
+		 get_field (state, TM_REL_DAY) - 7 * get_field (state, TM_REL_WEEK);
+    tm.tm_mon = get_field (state, TM_ABS_MON) - get_field (state, TM_REL_MON);
+    tm.tm_mon--; /* 1- to 0-based */
+    tm.tm_year = get_field (state, TM_ABS_YEAR) - get_field (state, TM_REL_YEAR) - 1900;
+
+    /*
+     * It's always normal time.
+     *
+     * REVISIT: This is probably not a solution that universally
+     * works. Just make sure DST is not taken into account. We don't
+     * want rounding to be affected by DST.
+     */
+    tm.tm_isdst = -1;
+
+    /* special case: rounding with week accuracy */
+    if (week_round != PARSE_TIME_NO_ROUND) {
+	/* normalize to get proper tm.wday */
+	r = normalize_tm (&tm);
+	if (r < 0)
+	    return r;
+
+	/* set more accurate fields back to zero */
+	tm.tm_sec = 0;
+	tm.tm_min = 0;
+	tm.tm_hour = 0;
+	tm.tm_isdst = -1;
+
+	/* monday is the true 1st day of week, but this is easier */
+	if (week_round <= PARSE_TIME_ROUND_DOWN)
+	    tm.tm_mday -= tm.tm_wday;
+	else
+	    tm.tm_mday += 7 - tm.tm_wday;
+    }
+
+    /* if TZ specified, convert from TZ to local time for mktime(3) */
+    if (is_field_set (state, TM_TZ)) {
+	time_t t = mktime (&tm);
+
+	/* from specified TZ to UTC */
+	tm.tm_min -= get_field (state, TM_TZ);
+
+	/* from UTC to local TZ (yes, it's hacky - FIXME) */
+	tm.tm_sec += difftime (mktime (localtime (&t)), mktime (gmtime (&t)));
+    }
+
+    /* FIXME: check return value, don't set if fail */
+    *t_out = mktime (&tm);
+
+    return 0;
+}
+
+/* internally, all errors are < 0. parse_time_string() returns errors > 0. */
+#define EXTERNAL_ERR(r) (-r)
+
+int
+parse_time_string (const char *s, time_t *t, const time_t *now, int round)
+{
+    struct state state = { { 0 } };
+    int r;
+
+    if (!s || !t)
+	return EXTERNAL_ERR (-PARSE_TIME_ERR);
+
+    r = parse_input (&state, s);
+    if (r < 0)
+	return EXTERNAL_ERR (r);
+
+    r = create_output (&state, t, now, round);
+    if (r < 0)
+	return EXTERNAL_ERR (r);
+
+    return 0;
+}
diff --git a/lib/parse-time-string.h b/lib/parse-time-string.h
new file mode 100644
index 0000000..50b7c6f
--- /dev/null
+++ b/lib/parse-time-string.h
@@ -0,0 +1,95 @@
+/*
+ * parse time string - user friendly date and time parser
+ * Copyright © 2012 Jani Nikula
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Jani Nikula <jani@nikula.org>
+ */
+
+#ifndef PARSE_TIME_STRING_H
+#define PARSE_TIME_STRING_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <time.h>
+
+/* return values for parse_time_string() */
+enum {
+    PARSE_TIME_OK = 0,
+    PARSE_TIME_ERR,		/* unspecified error */
+    PARSE_TIME_ERR_LIB,		/* library call failed */
+    PARSE_TIME_ERR_ALREADYSET,	/* attempt to set unit twice */
+    PARSE_TIME_ERR_FORMAT,	/* generic date/time format error */
+    PARSE_TIME_ERR_DATEFORMAT,	/* date format error */
+    PARSE_TIME_ERR_TIMEFORMAT,	/* time format error */
+    PARSE_TIME_ERR_INVALIDDATE,	/* date value error */
+    PARSE_TIME_ERR_INVALIDTIME,	/* time value error */
+    PARSE_TIME_ERR_KEYWORD,	/* unknown keyword */
+};
+
+/* round values for parse_time_string() */
+enum {
+    PARSE_TIME_ROUND_DOWN = -1,
+    PARSE_TIME_NO_ROUND = 0,
+    PARSE_TIME_ROUND_UP = 1,
+};
+
+/**
+ * parse_time_string() - user friendly date and time parser
+ * @s:		string to parse
+ * @t:		pointer to time_t to store parsed time in
+ * @now:	pointer to time_t containing reference date/time, or NULL
+ * @round:	PARSE_TIME_NO_ROUND, PARSE_TIME_ROUND_DOWN, or
+ *		PARSE_TIME_ROUND_UP
+ *
+ * Parse a date/time string 's' and store the parsed date/time result
+ * in 't'.
+ *
+ * A reference date/time is used for determining the "date/time units"
+ * (roughly equivalent to struct tm members) not specified by 's'. If
+ * 'now' is non-NULL, it must contain a pointer to a time_t to be used
+ * as reference date/time. Otherwise, the current time is used.
+ *
+ * If 's' does not specify a full date/time, the 'round' parameter
+ * specifies if and how the result should be rounded as follows:
+ *
+ *   PARSE_TIME_NO_ROUND: All date/time units that are not specified
+ *   by 's' are set to the corresponding unit derived from the
+ *   reference date/time.
+ *
+ *   PARSE_TIME_ROUND_DOWN: All date/time units that are more accurate
+ *   than the most accurate unit specified by 's' are set to the
+ *   smallest valid value for that unit. Rest of the unspecified units
+ *   are set as in PARSE_TIME_NO_ROUND.
+ *
+ *   PARSE_TIME_ROUND_UP: All date/time units that are more accurate
+ *   than the most accurate unit specified by 's' are set to the
+ *   smallest valid value for that unit. The most accurate unit
+ *   specified by 's' is incremented by one (and this is rolled over
+ *   to the less accurate units as necessary). Rest of the unspecified
+ *   units are set as in PARSE_TIME_NO_ROUND.
+ *
+ * Return 0 (PARSE_TIME_OK) for succesfully parsed date/time, or one
+ * of PARSE_TIME_ERR_* on error. 't' is not modified on error.
+ */
+int parse_time_string (const char *s, time_t *t, const time_t *now, int round);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* PARSE_TIME_STRING_H */
-- 
1.7.5.4

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [RFC PATCH 2/2] lib: add date range search
  2012-02-19 22:55 [RFC PATCH 0/2] natural language date range search Jani Nikula
  2012-02-19 22:55 ` [RFC PATCH 1/2] lib: add date/time parser Jani Nikula
@ 2012-02-19 22:55 ` Jani Nikula
  2012-02-25 15:05 ` [RFC PATCH 0/2] natural language " Tomi Ollila
  2 siblings, 0 replies; 8+ messages in thread
From: Jani Nikula @ 2012-02-19 22:55 UTC (permalink / raw)
  To: notmuch

Add a custom value range processor to enable date and time searches of
the form date:since..until, where "since" and "until" are expressions
understood by parse_time_string().

If "since" or "until" describes date/time at an accuracy of days or
less, the values are rounded according to the accuracy, towards past
for "since" and towards future for "until". For example,
date:november..yesterday would match from the beginning of November
until the end of yesterday. Expressions such as date:today..today
means since the beginning of today until the end of today.

Open-ended ranges are supported (since Xapian 1.2.1), i.e. you can
specify date:..until or date:since.. to not limit the start or end
date, respectively.

CAVEATS:

Xapian does not support spaces in range expressions. You can replace
the spaces with '_' or (in most cases) '-' or (in some cases) leave
the spaces out altogether.

Entering date:expr without ".." (for example date:yesterday) won't
work. You can achieve the expected result by duplicating the expr both
sides of ".." (for example date:yesterday..yesterday).

Signed-off-by: Jani Nikula <jani@nikula.org>
---
 lib/Makefile.local     |    1 +
 lib/database-private.h |    1 +
 lib/database.cc        |    4 ++++
 lib/getdate-proc.cc    |   34 ++++++++++++++++++++++++++++++++++
 lib/getdate-proc.h     |   21 +++++++++++++++++++++
 5 files changed, 61 insertions(+), 0 deletions(-)
 create mode 100644 lib/getdate-proc.cc
 create mode 100644 lib/getdate-proc.h

diff --git a/lib/Makefile.local b/lib/Makefile.local
index 803a284..7dd1f7d 100644
--- a/lib/Makefile.local
+++ b/lib/Makefile.local
@@ -59,6 +59,7 @@ libnotmuch_c_srcs =		\
 
 libnotmuch_cxx_srcs =		\
 	$(dir)/database.cc	\
+	$(dir)/getdate-proc.cc	\
 	$(dir)/directory.cc	\
 	$(dir)/index.cc		\
 	$(dir)/message.cc	\
diff --git a/lib/database-private.h b/lib/database-private.h
index 88532d5..ba13dc7 100644
--- a/lib/database-private.h
+++ b/lib/database-private.h
@@ -52,6 +52,7 @@ struct _notmuch_database {
     Xapian::QueryParser *query_parser;
     Xapian::TermGenerator *term_gen;
     Xapian::ValueRangeProcessor *value_range_processor;
+    Xapian::ValueRangeProcessor *getdate_proc;
 };
 
 /* Return the list of terms from the given iterator matching a prefix.
diff --git a/lib/database.cc b/lib/database.cc
index c928d02..a3f8adb 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -19,6 +19,7 @@
  */
 
 #include "database-private.h"
+#include "getdate-proc.h"
 
 #include <iostream>
 
@@ -682,12 +683,14 @@ notmuch_database_open (const char *path,
 	notmuch->term_gen = new Xapian::TermGenerator;
 	notmuch->term_gen->set_stemmer (Xapian::Stem ("english"));
 	notmuch->value_range_processor = new Xapian::NumberValueRangeProcessor (NOTMUCH_VALUE_TIMESTAMP);
+	notmuch->getdate_proc = new GetDateValueRangeProcessor (NOTMUCH_VALUE_TIMESTAMP, "date:", true);
 
 	notmuch->query_parser->set_default_op (Xapian::Query::OP_AND);
 	notmuch->query_parser->set_database (*notmuch->xapian_db);
 	notmuch->query_parser->set_stemmer (Xapian::Stem ("english"));
 	notmuch->query_parser->set_stemming_strategy (Xapian::QueryParser::STEM_SOME);
 	notmuch->query_parser->add_valuerangeprocessor (notmuch->value_range_processor);
+	notmuch->query_parser->add_valuerangeprocessor (notmuch->getdate_proc);
 
 	for (i = 0; i < ARRAY_SIZE (BOOLEAN_PREFIX_EXTERNAL); i++) {
 	    prefix_t *prefix = &BOOLEAN_PREFIX_EXTERNAL[i];
@@ -729,6 +732,7 @@ notmuch_database_close (notmuch_database_t *notmuch)
     delete notmuch->query_parser;
     delete notmuch->xapian_db;
     delete notmuch->value_range_processor;
+    delete notmuch->getdate_proc;
     talloc_free (notmuch);
 }
 
diff --git a/lib/getdate-proc.cc b/lib/getdate-proc.cc
new file mode 100644
index 0000000..31f8f03
--- /dev/null
+++ b/lib/getdate-proc.cc
@@ -0,0 +1,34 @@
+
+#include "database-private.h"
+#include "getdate-proc.h"
+#include "parse-time-string.h"
+
+/* see *ValueRangeProcessor in xapian-core/api/valuerangeproc.cc */
+Xapian::valueno
+GetDateValueRangeProcessor::operator() (std::string &begin, std::string &end)
+{
+    time_t t, now;
+
+    if (Xapian::StringValueRangeProcessor::operator() (begin, end) == Xapian::BAD_VALUENO)
+	return Xapian::BAD_VALUENO;
+
+    /* use the same 'now' for begin and end */
+    if (time (&now) == (time_t) -1)
+	return Xapian::BAD_VALUENO;
+
+    if (!begin.empty ()) {
+	if (parse_time_string (begin.c_str (), &t, &now, PARSE_TIME_ROUND_DOWN))
+	    return Xapian::BAD_VALUENO;
+
+	begin.assign (Xapian::sortable_serialise ((double) t));
+    }
+
+    if (!end.empty ()) {
+	if (parse_time_string (end.c_str (), &t, &now, PARSE_TIME_ROUND_UP))
+	    return Xapian::BAD_VALUENO;
+
+	end.assign (Xapian::sortable_serialise ((double) t));
+    }
+
+    return valno;
+}
diff --git a/lib/getdate-proc.h b/lib/getdate-proc.h
new file mode 100644
index 0000000..351d06e
--- /dev/null
+++ b/lib/getdate-proc.h
@@ -0,0 +1,21 @@
+
+#ifndef NOTMUCH_GETDATE_PROC_H
+#define NOTMUCH_GETDATE_PROC_H
+
+#include <xapian.h>
+
+/* see *ValueRangeProcessor in xapian-core/include/xapian/queryparser.h */
+class GetDateValueRangeProcessor : public Xapian::StringValueRangeProcessor {
+public:
+    GetDateValueRangeProcessor (Xapian::valueno slot_)
+	: StringValueRangeProcessor (slot_) { }
+
+    GetDateValueRangeProcessor (Xapian::valueno slot_,
+				const std::string &str_,
+				bool prefix_ = true)
+	: StringValueRangeProcessor (slot_, str_, prefix_) { }
+
+    Xapian::valueno operator() (std::string &begin, std::string &end);
+};
+
+#endif /* NOTMUCH_GETDATE_PROC_H */
-- 
1.7.5.4

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [RFC PATCH 0/2] natural language date range search
  2012-02-19 22:55 [RFC PATCH 0/2] natural language date range search Jani Nikula
  2012-02-19 22:55 ` [RFC PATCH 1/2] lib: add date/time parser Jani Nikula
  2012-02-19 22:55 ` [RFC PATCH 2/2] lib: add date range search Jani Nikula
@ 2012-02-25 15:05 ` Tomi Ollila
  2012-02-25 19:53   ` Jani Nikula
  2 siblings, 1 reply; 8+ messages in thread
From: Tomi Ollila @ 2012-02-25 15:05 UTC (permalink / raw)
  To: Jani Nikula, notmuch

On Mon, 20 Feb 2012 00:55:50 +0200, Jani Nikula <jani@nikula.org> wrote:
> Hi all, these patches add support for natural language date range search
> of the form date:since..until, where since and until can be fairly free
> form date/time expressions in English.
> 
> Examples:
> 
> date:two-days..yesterday (all mail in the two days before today)
> date:12h.. (all mail since 12 hrs ago)
> date:november..november (all mail in previous november)
> date:2011.. (all mail since the beginning of 2011)
> date:last-week..this-week (all mail over last and current week)
> date:5/10/2011-12:34:55..10pm_2012-01-14
> 
> Plus plenty more and combinations of the above.

Pretty c00l, the above and the code.

> 
> The repository for the date/time parser with a command line tool is at
> [1], and there's a README [2] with a bunch of details too.

By seeing the thoughts thrown in IRC there seems to be plenty if things
to resolve until something like this is going to be available in stock
notmuch. In the meanwhile I provide some ideas into the soup; maybe
our collective mind can have some use of this.


Q: Could 'date:timestr' be converted to 'date:timestr..timestr' ?

In this idea -<timestr> means relative time and <timestr> absolute
time. The the time string consists of number and letter and assume
the above suggestion for date:timestr (<- == date:timestr..timestr)
Letters are s seconds  h hours  d days  w weeks  m months (more
useful than for minutes) and  y years.

date:-2000s     email exactly 2000 seconds ago (not very useful)
date:-2000s..   since 2000 secs in past
date:..-2000s   up to 2000 secs in past

date:3600s      email with date 01:00 -- today if (local) time is
                more than 1am. yesterday if less. If there is not
                mail with date exactly that then no email matches
                (i.e. same usefullness applies as above)
date:3600s..    email with date since 01:00 (same today/yesterday
                resolvation like above)

date:12h..      since noon, either yesterday (time less than 12pm)
                or today (currently afternoon)
date:12h        like above, but since 12:00 to 13:00
date:12h..12h   ditto

date:6pm..      since 18:00 either today (current time > 6pm) or yesterday

date:-12h..     since 12 hours ago
date:-12h       since 12 hours ago to 11 hours ago

date:-1h        last hour
date:0h         today 12am - 01am 

date:-0d        today
date:-0d..      ditto
date:-1d        yesterday
date:-1d..      yesterday & today

date:-2d        2 days ago
date:-2d..      day before yesterday, yesterday & today

date:0d         last sunday (if today sunday, today (so being consistent))
date:1d         last monday (like above)
date:6d         last saturday (like above)
date:7d         last sunday (same as 0d)

date:-0w        current week (does week start sun or mon)
date:-1w        last week (same as above)
date:-2w..      since beginning of 2 weeks ago

date:1w         (ISO) week number ? current week it this is same week  
                number. this week last year if current week is less.
date:1w..	since the above date resolved.

date:11m	last november (m is more useable in month than in minute)
date:11m..      since last november

date:-11m       whole month 11 months ago
date:-11m..	since beginning of 11 months ago

date:70y	year 1970
date:69y        year 2069
date:0y         year 2000
date:99y..      since beginning of 1999

date:-0y        this year
date:-1y        last year


This is incomplete, but pretty easy to use format. Letters from English
language. What is missing letter for month (M?) ar mday (D?). All thought
as in local time...

... strings could be concatenated: for example: -3d11h (meaning second
always absolute). Need to be in granularity order (or something). Timezone
could be added (pst, utc, eest, z -- and even like -11 or +4
( -2y11m+7 meaning year 2010 november in timezone utc + 7 (this year: 2012))

Of course range date:-2d..-5h would be supported.

> BR,
> Jani.
> 
> 
> [1] https://gitorious.org/parse-time-string/parse-time-string
> [2] https://gitorious.org/parse-time-string/parse-time-string/blobs/master/README
> 

Tomi

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC PATCH 0/2] natural language date range search
  2012-02-25 15:05 ` [RFC PATCH 0/2] natural language " Tomi Ollila
@ 2012-02-25 19:53   ` Jani Nikula
  2012-02-27  7:45     ` Tomi Ollila
  0 siblings, 1 reply; 8+ messages in thread
From: Jani Nikula @ 2012-02-25 19:53 UTC (permalink / raw)
  To: Tomi Ollila, notmuch

On Sat, 25 Feb 2012 17:05:44 +0200, Tomi Ollila <tomi.ollila@iki.fi> wrote:
> On Mon, 20 Feb 2012 00:55:50 +0200, Jani Nikula <jani@nikula.org> wrote:
> > Hi all, these patches add support for natural language date range search
> > of the form date:since..until, where since and until can be fairly free
> > form date/time expressions in English.
> > 
> > Examples:
> > 
> > date:two-days..yesterday (all mail in the two days before today)
> > date:12h.. (all mail since 12 hrs ago)
> > date:november..november (all mail in previous november)
> > date:2011.. (all mail since the beginning of 2011)
> > date:last-week..this-week (all mail over last and current week)
> > date:5/10/2011-12:34:55..10pm_2012-01-14
> > 
> > Plus plenty more and combinations of the above.
> 
> Pretty c00l, the above and the code.

Thanks! :)

> > 
> > The repository for the date/time parser with a command line tool is at
> > [1], and there's a README [2] with a bunch of details too.
> 
> By seeing the thoughts thrown in IRC there seems to be plenty if things
> to resolve until something like this is going to be available in stock
> notmuch. In the meanwhile I provide some ideas into the soup; maybe
> our collective mind can have some use of this.
> 
> 
> Q: Could 'date:timestr' be converted to 'date:timestr..timestr' ?

AFAICT this would require the custom query parser.

> In this idea -<timestr> means relative time and <timestr> absolute
> time. The the time string consists of number and letter and assume
> the above suggestion for date:timestr (<- == date:timestr..timestr)
> Letters are s seconds  h hours  d days  w weeks  m months (more
> useful than for minutes) and  y years.

I'll put it bluntly: show me the code! ;)

I'll comment below how your examples can be expressed with working code
in this series, just for comparison, and to show what can be done with
this.

> date:-2000s     email exactly 2000 seconds ago (not very useful)

date:2000s..2000s

> date:-2000s..   since 2000 secs in past

date:2000s..
date:1000s500s500s.. (all accumulated)

> date:..-2000s   up to 2000 secs in past

date:..2000s

> date:3600s      email with date 01:00 -- today if (local) time is
>                 more than 1am. yesterday if less. If there is not
>                 mail with date exactly that then no email matches
>                 (i.e. same usefullness applies as above)
> date:3600s..    email with date since 01:00 (same today/yesterday
>                 resolvation like above)

Okay, there's no easy way to express it like this.

> date:12h..      since noon, either yesterday (time less than 12pm)
>                 or today (currently afternoon)

date:noon.. 
date:12pm..
date:12:00..
date:12:00:00..

(I think in my implementation currently refers to today regardless of
current time; there's a REVISIT comment about that. Not difficult to
fix.)

> date:12h        like above, but since 12:00 to 13:00
> date:12h..12h   ditto

For example date:noon..13:00

> date:6pm..      since 18:00 either today (current time > 6pm) or yesterday

The same.

> date:-12h..     since 12 hours ago

date:12h..

> date:-12h       since 12 hours ago to 11 hours ago

date:12h..11hrs

> date:-1h        last hour

date:1h..

> date:0h         today 12am - 01am 

For example date:00:00..01am

> date:-0d        today

To cover the whole day, date:today..today

> date:-0d..      ditto

date:0d..0d (from zero days ago to zero days ago, rounded accordingly)

In fact, in my implementation "today" is simply the same as 0 days ago.

> date:-1d        yesterday

date:yesterday..yesterday

> date:-1d..      yesterday & today

date:1d..

> date:-2d        2 days ago

For example date:two-days..2d

> date:-2d..      day before yesterday, yesterday & today

date:2d..

> date:0d         last sunday (if today sunday, today (so being consistent))

date:sunday..sunday (if today sunday, one week ago)

> date:1d         last monday (like above)
> date:6d         last saturday (like above)
> date:7d         last sunday (same as 0d)

Just weekday names.

> date:-0w        current week (does week start sun or mon)

For example date:0w...this-week

> date:-1w        last week (same as above)

For example date:last-week..1w

> date:-2w..      since beginning of 2 weeks ago

date:2w..

> date:1w         (ISO) week number ? current week it this is same week  
>                 number. this week last year if current week is less.
> date:1w..	since the above date resolved.

N/A, my parser does not support week numbers.

> date:11m	last november (m is more useable in month than in minute)

date:november..november, or by specifying the year date:2011-11..2011-11

> date:11m..      since last november

date:november..

> date:-11m       whole month 11 months ago

date:11mon..11M (lower case m is minutes)

> date:-11m..	since beginning of 11 months ago

date:11months..

> date:70y	year 1970

date:1970..1970

> date:69y        year 2069

date:2069..2069

> date:0y         year 2000

date:2000..2000

> date:99y..      since beginning of 1999

date:2000..

(It's possible to use 2-digit years in combination with some more
context, e.g. 1.1.99)

> date:-0y        this year

date:this-year..this-year

> date:-1y        last year

date:last-year..1y

> This is incomplete, but pretty easy to use format. Letters from English
> language. What is missing letter for month (M?) ar mday (D?). All thought
> as in local time...
> 
> ... strings could be concatenated: for example: -3d11h (meaning second
> always absolute). Need to be in granularity order (or something). Timezone
> could be added (pst, utc, eest, z -- and even like -11 or +4
> ( -2y11m+7 meaning year 2010 november in timezone utc + 7 (this year: 2012))

My implementation allows adding any number of relative dates, and they
are accumulated, but only towards past. Relative date can be combined
with absolute date, as long as the same absolute unit is not specified
twice (can't say "january february", but "january one month" is one
month back from january).

(With a couple of small further patches still in my local tree) you can
specify time zone for all of the above.


BR,
Jani.



> 
> Of course range date:-2d..-5h would be supported.
> 
> > BR,
> > Jani.
> > 
> > 
> > [1] https://gitorious.org/parse-time-string/parse-time-string
> > [2] https://gitorious.org/parse-time-string/parse-time-string/blobs/master/README
> > 
> 
> Tomi
> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC PATCH 1/2] lib: add date/time parser
  2012-02-19 22:55 ` [RFC PATCH 1/2] lib: add date/time parser Jani Nikula
@ 2012-02-26  8:45   ` Mark Walters
  2012-02-26 20:39     ` Jani Nikula
  0 siblings, 1 reply; 8+ messages in thread
From: Mark Walters @ 2012-02-26  8:45 UTC (permalink / raw)
  To: Jani Nikula, notmuch


Hi I have not read all of this carefully but it looks very nice to
me. It is pleasantly nice to read. 

I have not looked through the create output function yet but have looked
at most of the rest.

My only concern (as mentioned on irc) is the question of
internationalisation. I think most of the this can be done by allowing
other keyword tables and that seems quite clean. Ideally I think the
user would set which to localisation use in the config file and then the
cli would pass that to the lib parser.

I think it would be a shame to hold up this very useful functionality
just because of these internationalisation concerns.

The code is fairly large but it is easy to read and I would imagine
(excepting the internationalisation question) almost maintenance free.

On the actual code I have a small number of comments/queries below.

Best wishes

Mark

On Mon, 20 Feb 2012 00:55:51 +0200, Jani Nikula <jani@nikula.org> wrote:
> Signed-off-by: Jani Nikula <jani@nikula.org>
> ---
>  lib/Makefile.local      |    1 +
>  lib/parse-time-string.c | 1304 +++++++++++++++++++++++++++++++++++++++++++++++
>  lib/parse-time-string.h |   95 ++++
>  3 files changed, 1400 insertions(+), 0 deletions(-)
>  create mode 100644 lib/parse-time-string.c
>  create mode 100644 lib/parse-time-string.h
> 
> diff --git a/lib/Makefile.local b/lib/Makefile.local
> index 54c4dea..803a284 100644
> --- a/lib/Makefile.local
> +++ b/lib/Makefile.local
> @@ -53,6 +53,7 @@ libnotmuch_c_srcs =		\
>  	$(dir)/libsha1.c	\
>  	$(dir)/message-file.c	\
>  	$(dir)/messages.c	\
> +	$(dir)/parse-time-string.c	\
>  	$(dir)/sha1.c		\
>  	$(dir)/tags.c
>  
> diff --git a/lib/parse-time-string.c b/lib/parse-time-string.c
> new file mode 100644
> index 0000000..59713dc
> --- /dev/null
> +++ b/lib/parse-time-string.c
> @@ -0,0 +1,1304 @@
> +/*
> + * parse time string - user friendly date and time parser
> + * Copyright © 2012 Jani Nikula
> + *
> + * This program is free software: you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation, either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program.  If not, see <http://www.gnu.org/licenses/>.
> + *
> + * Author: Jani Nikula <jani@nikula.org>
> + */
> +
> +#ifndef PARSE_TIME_DEBUG
> +#define NDEBUG /* for assert() */
> +#endif
> +
> +#include <assert.h>
> +#include <ctype.h>
> +#include <errno.h>
> +#include <limits.h>
> +#include <stdio.h>
> +#include <stdarg.h>
> +#include <stdbool.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <strings.h>
> +#include <time.h>
> +#include <sys/time.h>
> +#include <sys/types.h>
> +
> +#include "parse-time-string.h"
> +
> +#define ARRAY_SIZE(a) (sizeof (a) / sizeof (a[0]))
> +
> +/* field indices in struct state tm, and set fields */
> +enum field {
> +    /* keep SEC...YEAR in this order */
> +    TM_ABS_SEC,		/* seconds */
> +    TM_ABS_MIN,		/* minutes */
> +    TM_ABS_HOUR,	/* hours */
> +    TM_ABS_MDAY,	/* day of the month */
> +    TM_ABS_MON,		/* month */
> +    TM_ABS_YEAR,	/* year */
> +
> +    TM_ABS_WDAY,	/* day of the week. special: may be relative */
> +    TM_ABS_ISDST,	/* daylight saving time */
> +
> +    TM_AMPM,		/* am vs. pm */
> +    TM_TZ,		/* timezone in minutes */
> +
> +    /* keep SEC...YEAR in this order */
> +    TM_REL_SEC,		/* seconds relative to now */
> +    TM_REL_MIN,		/* minutes ... */
> +    TM_REL_HOUR,	/* hours ... */
> +    TM_REL_DAY,		/* days ... */
> +    TM_REL_MON,		/* months ... */
> +    TM_REL_YEAR,	/* years ... */
> +    TM_REL_WEEK,	/* weeks ... */
> +
> +    TM_NONE,		/* not a field */
> +
> +    TM_SIZE = TM_NONE,
> +};
> +
> +enum field_set {
> +    FIELD_UNSET,
> +    FIELD_SET,
> +    FIELD_NOW,
> +};
> +
> +static enum field
> +next_field (enum field field)
> +{
> +    /* note: depends on the enum ordering */
> +    return field < TM_ABS_YEAR ? field + 1 : TM_NONE;
> +}
> +
> +static enum field
> +abs_to_rel_field (enum field field)
> +{
> +    assert (field <= TM_ABS_YEAR);
> +
> +    /* note: depends on the enum ordering */
> +    return field + (TM_REL_SEC - TM_ABS_SEC);
> +}
> +
> +/* get zero value for field */
> +static int
> +field_zero (enum field field)
> +{
> +    if (field == TM_ABS_MDAY || field == TM_ABS_MON)
> +	return 1;
> +    else if (field == TM_ABS_YEAR)
> +	return 1970;
> +    else
> +	return 0;
> +}
> +
> +struct state {
> +    int tm[TM_SIZE];			/* parsed date and time */
> +    enum field_set set[TM_SIZE];	/* set status of tm */
> +
> +    enum field last_field;
> +    char delim;
> +
> +    int postponed_length;	/* number of digits in postponed value */
> +    int postponed_value;
> +};

Personally I would prefer this above the function definitions (but
obviously that is up to you).

> +
> +/*
> + * Helpers for postponed numbers.
> + *
> + * postponed_length is the number of digits in postponed value. 0
> + * means there is no postponed number. -1 means there is a postponed
> + * number, but it comes from a keyword, and it doesn't have digits.
> + */
> +static int
> +get_postponed_length (struct state *state)
> +{
> +    return state->postponed_length;
> +}
> +
> +static bool
> +get_postponed_number (struct state *state, int *v, int *n)
> +{
> +    if (!state->postponed_length)
> +	return false;
> +
> +    if (n)
> +	*n = state->postponed_length;
> +
> +    if (v)
> +	*v = state->postponed_value;
> +
> +    state->postponed_length = 0;
> +    state->postponed_value = 0;
> +
> +    return true;
> +}
> +
> +/* parse postponed number if one exists */
> +static int parse_postponed_number (struct state *state, int v, int n);
> +static int
> +handle_postponed_number (struct state *state)
> +{
> +    int v = state->postponed_value;
> +    int n = state->postponed_length;
> +
> +    if (!n)
> +	return 0;
> +
> +    state->postponed_value = 0;
> +    state->postponed_length = 0;
> +
> +    return parse_postponed_number (state, v, n);
> +}
> +
> +/*
> + * set new postponed number to be handled later. if one exists
> + * already, handle it first. n may be -1 to indicate a keyword that
> + * has no number length.
> + */
> +static int
> +set_postponed_number (struct state *state, int v, int n)
> +{
> +    int r;
> +
> +    /* parse previous postponed number, if any */
> +    r = handle_postponed_number (state);
> +    if (r)
> +	return r;
> +
> +    state->postponed_length = n;
> +    state->postponed_value = v;
> +
> +    return 0;
> +}
> +
> +static void
> +set_delim (struct state *state, char delim)
> +{
> +    state->delim = delim;
> +}
> +
> +static void
> +unset_delim (struct state *state)
> +{
> +    state->delim = 0;
> +}
> +
> +/*
> + * Field set/get/mod helpers.
> + */
> +
> +/* returns unset for non-tracked fields */
> +static bool
> +is_field_set (struct state *state, enum field field)
> +{
> +    assert (field < ARRAY_SIZE (state->tm));
> +
> +    return field < ARRAY_SIZE (state->set) &&
> +	   state->set[field] != FIELD_UNSET;
> +}
> +
> +static void
> +unset_field (struct state *state, enum field field)
> +{
> +    assert (field < ARRAY_SIZE (state->tm));
> +
> +    state->set[field] = FIELD_UNSET;
> +    state->tm[field] = 0;
> +}
> +
> +/* Set field to value. */
> +static int
> +set_field (struct state *state, enum field field, int value)
> +{
> +    int r;
> +
> +    assert (field < ARRAY_SIZE (state->tm));
> +
> +    /* some fields can only be set once */
> +    if (field < ARRAY_SIZE (state->set) && state->set[field] != FIELD_UNSET)
> +	return -PARSE_TIME_ERR_ALREADYSET;
> +
> +    state->set[field] = FIELD_SET;
> +
> +    /*
> +     * REVISIT: There could be a "next_field" that would be set from
> +     * "field" for the duration of the handle_postponed_number() call,
> +     * so it has more information to work with.
> +     */
> +
> +    /* parse postponed number, if any */
> +    r = handle_postponed_number (state);
> +    if (r)
> +	return r;
> +
> +    unset_delim (state);
> +
> +    state->tm[field] = value;
> +    state->last_field = field;
> +
> +    return 0;
> +}
> +
> +/*
> + * Mark n fields in fields to be set to current date/time in the
> + * specified time zone, or local timezone if not specified. The fields
> + * will be initialized after parsing is complete and timezone is
> + * known.
> + */
> +static int
> +set_fields_to_now (struct state *state, enum field *fields, size_t n)
> +{
> +    size_t i;
> +    int r;
> +
> +    for (i = 0; i < n; i++) {
> +	r = set_field (state, fields[i], 0);
> +	if (r)
> +	    return r;
> +	state->set[fields[i]] = FIELD_NOW;
> +    }
> +
> +    return 0;
> +}
> +
> +/* Modify field by adding value to it. To be used on relative fields. */
> +static int
> +mod_field (struct state *state, enum field field, int value)
> +{
> +    int r;
> +
> +    assert (field < ARRAY_SIZE (state->tm));   /* assert relative??? */
> +
> +    if (field < ARRAY_SIZE (state->set))
> +	state->set[field] = FIELD_SET;
> +
> +    /* parse postponed number, if any */
> +    r = handle_postponed_number (state);
> +    if (r)
> +	return r;
> +
> +    unset_delim (state);
> +
> +    state->tm[field] += value;
> +    state->last_field = field;
> +
> +    return 0;
> +}
> +
> +/*
> + * Get field value. Make sure the field is set before query. It's most
> + * likely an error to call this while parsing (for example fields set
> + * as FIELD_NOW will only be set to some value after parsing).
> + */
> +static int
> +get_field (struct state *state, enum field field)
> +{
> +    assert (field < ARRAY_SIZE (state->tm));
> +
> +    return state->tm[field];
> +}
> +
> +/* Unset indicator for time and date set helpers. */
> +#define UNSET -1
> +
> +/* Time set helper. No input checking. Use UNSET (-1) to leave unset. */
> +static int
> +set_abs_time (struct state *state, int hour, int min, int sec)
> +{
> +    int r;
> +
> +    if (hour != UNSET) {
> +	if ((r = set_field (state, TM_ABS_HOUR, hour)))
> +	    return r;
> +    }
> +
> +    if (min != UNSET) {
> +	if ((r = set_field (state, TM_ABS_MIN, min)))
> +	    return r;
> +    }
> +
> +    if (sec != UNSET) {
> +	if ((r = set_field (state, TM_ABS_SEC, sec)))
> +	    return r;
> +    }
> +
> +    return 0;
> +}
> +
> +/* Date set helper. No input checking. Use UNSET (-1) to leave unset. */
> +static int
> +set_abs_date (struct state *state, int year, int mon, int mday)
> +{
> +    int r;
> +
> +    if (year != UNSET) {
> +	if ((r = set_field (state, TM_ABS_YEAR, year)))
> +	    return r;
> +    }
> +
> +    if (mon != UNSET) {
> +	if ((r = set_field (state, TM_ABS_MON, mon)))
> +	    return r;
> +    }
> +
> +    if (mday != UNSET) {
> +	if ((r = set_field (state, TM_ABS_MDAY, mday)))
> +	    return r;
> +    }
> +
> +    return 0;
> +}
> +
> +/*
> + * Keyword parsing and handling.
> + */
> +struct keyword;
> +typedef int (*setter_t)(struct state *state, struct keyword *kw);
> +
> +struct keyword {
> +    const char *name;	/* keyword */
> +    size_t minlen;	/* min length to match, 0 = must match all */
> +    enum field field;	/* field to set, or FIELD_NONE if N/A */
> +    int value;		/* value to set, or 0 if N/A */
> +    setter_t set;	/* function to use for setting, if non-NULL */
> +};
> +
> +/*
> + * Setter callback functions for keywords.
> + */
> +static int
> +kw_set_default (struct state *state, struct keyword *kw)
> +{
> +    return set_field (state, kw->field, kw->value);
> +}
> +
> +static int
> +kw_set_rel (struct state *state, struct keyword *kw)
> +{
> +    int multiplier = 1;
> +
> +    /* get a previously set multiplier, if any */
> +    get_postponed_number (state, &multiplier, NULL);
> +
> +    /* accumulate relative field values */
> +    return mod_field (state, kw->field, multiplier * kw->value);
> +}
> +
> +static int
> +kw_set_number (struct state *state, struct keyword *kw)
> +{
> +    /* -1 = no length, from keyword */
> +    return set_postponed_number (state, kw->value, -1);
> +}
> +
> +static int
> +kw_set_month (struct state *state, struct keyword *kw)
> +{
> +    int n = get_postponed_length (state);
> +
> +    /* consume postponed number if it could be mday */
> +    if (n == 1 || n == 2) {
> +	int r, v;
> +
> +	get_postponed_number (state, &v, NULL);
> +
> +	if (v < 1 || v > 31)
> +	    return -PARSE_TIME_ERR_INVALIDDATE;
> +
> +	r = set_field (state, TM_ABS_MDAY, v);
> +	if (r)
> +	    return r;
> +    }
> +
> +    return set_field (state, kw->field, kw->value);
> +}
> +
> +static int
> +kw_set_ampm (struct state *state, struct keyword *kw)
> +{
> +    int n = get_postponed_length (state);
> +
> +    /* consume postponed number if it could be hour */
> +    if (n == 1 || n == 2) {
> +	int r, v;
> +
> +	get_postponed_number (state, &v, NULL);
> +
> +	if (v < 1 || v > 12)
> +	    return -PARSE_TIME_ERR_INVALIDTIME;
> +
> +	r = set_abs_time (state, v, 0, 0);
> +	if (r)
> +	    return r;
> +    }
> +
> +    return set_field (state, kw->field, kw->value);
> +}
> +
> +static int
> +kw_set_timeofday (struct state *state, struct keyword *kw)
> +{
> +    return set_abs_time (state, kw->value, 0, 0);
> +}
> +
> +static int
> +kw_set_today (struct state *state, struct keyword *kw)
> +{
> +    enum field fields[] = { TM_ABS_YEAR, TM_ABS_MON, TM_ABS_MDAY };
> +
> +    return set_fields_to_now (state, fields, ARRAY_SIZE (fields));
> +}
> +
> +static int
> +kw_set_now (struct state *state, struct keyword *kw)
> +{
> +    enum field fields[] = { TM_ABS_HOUR, TM_ABS_MIN, TM_ABS_SEC };
> +
> +    return set_fields_to_now (state, fields, ARRAY_SIZE (fields));
> +}
> +
> +static int
> +kw_set_ordinal (struct state *state, struct keyword *kw)
> +{
> +    int n, v;
> +
> +    /* require a postponed number */
> +    if (!get_postponed_number (state, &v, &n))
> +	return -PARSE_TIME_ERR_DATEFORMAT;
> +
> +    /* ordinals are mday */
> +    if (n != 1 && n != 2)
> +	return -PARSE_TIME_ERR_DATEFORMAT;
> +
> +    /* be strict about st, nd, rd, and lax about th */
> +    if (strcasecmp (kw->name, "st") == 0 && v != 1 && v != 21 && v != 31)
> +	return -PARSE_TIME_ERR_INVALIDDATE;
> +    else if (strcasecmp (kw->name, "nd") == 0 && v != 2 && v != 22)
> +	return -PARSE_TIME_ERR_INVALIDDATE;
> +    else if (strcasecmp (kw->name, "rd") == 0 && v != 3 && v != 23)
> +	return -PARSE_TIME_ERR_INVALIDDATE;
> +    else if (strcasecmp (kw->name, "th") == 0 && (v < 1 || v > 31))
> +	return -PARSE_TIME_ERR_INVALIDDATE;
> +
> +    return set_field (state, TM_ABS_MDAY, v);
> +}
> +
> +/*
> + * Accepted keywords.
> + *
> + * If keyword begins with upper case letter, then the matching will be
> + * case sensitive. Otherwise the matching is case insensitive.
> + *
> + * If setter is NULL, set_default will be used.
> + *
> + * Note: Order matters. Matching is greedy, longest match is used, but
> + * of equal length matches the first one is used.
> + */
> +static struct keyword keywords[] = {
> +    /* weekdays */
> +    { "sunday",		3,	TM_ABS_WDAY,	0,	NULL },
> +    { "monday",		3,	TM_ABS_WDAY,	1,	NULL },
> +    { "tuesday",	3,	TM_ABS_WDAY,	2,	NULL },
> +    { "wednesday",	3,	TM_ABS_WDAY,	3,	NULL },
> +    { "thursday",	3,	TM_ABS_WDAY,	4,	NULL },
> +    { "friday",		3,	TM_ABS_WDAY,	5,	NULL },
> +    { "saturday",	3,	TM_ABS_WDAY,	6,	NULL },
> +
> +    /* months */
> +    { "january",	3,	TM_ABS_MON,	1,	kw_set_month },
> +    { "february",	3,	TM_ABS_MON,	2,	kw_set_month },
> +    { "march",		3,	TM_ABS_MON,	3,	kw_set_month },
> +    { "april",		3,	TM_ABS_MON,	4,	kw_set_month },
> +    { "may",		3,	TM_ABS_MON,	5,	kw_set_month },
> +    { "june",		3,	TM_ABS_MON,	6,	kw_set_month },
> +    { "july",		3,	TM_ABS_MON,	7,	kw_set_month },
> +    { "august",		3,	TM_ABS_MON,	8,	kw_set_month },
> +    { "september",	3,	TM_ABS_MON,	9,	kw_set_month },
> +    { "october",	3,	TM_ABS_MON,	10,	kw_set_month },
> +    { "november",	3,	TM_ABS_MON,	11,	kw_set_month },
> +    { "december",	3,	TM_ABS_MON,	12,	kw_set_month },
> +
> +    /* durations */
> +    { "years",		1,	TM_REL_YEAR,	1,	kw_set_rel },
> +    { "weeks",		1,	TM_REL_WEEK,	1,	kw_set_rel },
> +    { "days",		1,	TM_REL_DAY,	1,	kw_set_rel },
> +    { "hours",		1,	TM_REL_HOUR,	1,	kw_set_rel },
> +    { "hrs",		1,	TM_REL_HOUR,	1,	kw_set_rel },
> +    /* M=months, m=minutes. single M must precede minutes in the list. */
> +    { "M",		1,	TM_REL_MON,	1,	kw_set_rel },
> +    { "minutes",	1,	TM_REL_MIN,	1,	kw_set_rel },
> +    { "mins",		1,	TM_REL_MIN,	1,	kw_set_rel },
> +    { "months",		1,	TM_REL_MON,	1,	kw_set_rel },
> +    { "seconds",	1,	TM_REL_SEC,	1,	kw_set_rel },
> +    { "secs",		1,	TM_REL_SEC,	1,	kw_set_rel },
> +
> +    /* numbers */
> +    { "one",		0,	TM_NONE,	1,	kw_set_number },
> +    { "two",		0,	TM_NONE,	2,	kw_set_number },
> +    { "three",		0,	TM_NONE,	3,	kw_set_number },
> +    { "four",		0,	TM_NONE,	4,	kw_set_number },
> +    { "five",		0,	TM_NONE,	5,	kw_set_number },
> +    { "six",		0,	TM_NONE,	6,	kw_set_number },
> +    { "seven",		0,	TM_NONE,	7,	kw_set_number },
> +    { "eight",		0,	TM_NONE,	8,	kw_set_number },
> +    { "nine",		0,	TM_NONE,	9,	kw_set_number },
> +    { "ten",		0,	TM_NONE,	10,	kw_set_number },
> +    { "dozen",		0,	TM_NONE,	12,	kw_set_number },
> +    { "hundred",	0,	TM_NONE,	100,	kw_set_number },
> +
> +    /* special number forms */
> +    { "this",		0,	TM_NONE,	0,	kw_set_number },
> +    { "last",		0,	TM_NONE,	1,	kw_set_number },
> +
> +    /* specials */
> +    { "yesterday",	0,	TM_REL_DAY,	1,	kw_set_rel },
> +    { "today",		0,	TM_NONE,	0,	kw_set_today },
> +    { "now",		0,	TM_NONE,	0,	kw_set_now },
> +    { "noon",		0,	TM_NONE,	12,	kw_set_timeofday },
> +    { "midnight",	0,	TM_NONE,	0,	kw_set_timeofday },
> +    { "am",		0,	TM_AMPM,	0,	kw_set_ampm },
> +    { "a.m.",		0,	TM_AMPM,	0,	kw_set_ampm },
> +    { "pm",		0,	TM_AMPM,	1,	kw_set_ampm },
> +    { "p.m.",		0,	TM_AMPM,	1,	kw_set_ampm },
> +    { "st",		0,	TM_NONE,	0,	kw_set_ordinal },
> +    { "nd",		0,	TM_NONE,	0,	kw_set_ordinal },
> +    { "rd",		0,	TM_NONE,	0,	kw_set_ordinal },
> +    { "th",		0,	TM_NONE,	0,	kw_set_ordinal },
> +
> +    /* timezone codes: offset in minutes. FIXME: add more codes. */
> +    { "pst",		0,	TM_TZ,		-8*60,	NULL },
> +    { "mst",		0,	TM_TZ,		-7*60,	NULL },
> +    { "cst",		0,	TM_TZ,		-6*60,	NULL },
> +    { "est",		0,	TM_TZ,		-5*60,	NULL },
> +    { "ast",		0,	TM_TZ,		-4*60,	NULL },
> +    { "nst",		0,	TM_TZ,		-(3*60+30),	NULL },
> +
> +    { "gmt",		0,	TM_TZ,		0,	NULL },
> +    { "utc",		0,	TM_TZ,		0,	NULL },
> +
> +    { "wet",		0,	TM_TZ,		0,	NULL },
> +    { "cet",		0,	TM_TZ,		1*60,	NULL },
> +    { "eet",		0,	TM_TZ,		2*60,	NULL },
> +    { "fet",		0,	TM_TZ,		3*60,	NULL },
> +
> +    { "wat",		0,	TM_TZ,		1*60,	NULL },
> +    { "cat",		0,	TM_TZ,		2*60,	NULL },
> +    { "eat",		0,	TM_TZ,		3*60,	NULL },
> +};
> +
> +/*
> + * Compare strings s and keyword. Return number of matching chars on
> + * match, 0 for no match. Match must be at least n chars (n == 0 all
> + * of keyword), otherwise it's not a match. Use match_case for case
> + * sensitive matching.
> + */
> +static size_t
> +stringcmp (const char *s, const char *keyword, size_t n, bool match_case)
> +{
> +    size_t i;
> +
> +    for (i = 0; *s && *keyword; i++, s++, keyword++) {
> +	if (match_case) {
> +	    if (*s != *keyword)
> +		break;
> +	} else {
> +	    if (tolower ((unsigned char) *s) !=
> +		tolower ((unsigned char) *keyword))
> +		break;
> +	}
> +    }
> +
> +    if (n)
> +	return i < n ? 0 : i;
> +    else
> +	return *keyword ? 0 : i;
> +}
> +
> +/*
> + * Parse a keyword. Return < 0 on error, number of parsed chars on
> + * success.
> + */
> +static ssize_t
> +parse_keyword (struct state *state, const char *s)
> +{
> +    unsigned int i;
> +    size_t n, max_n = 0;
> +    struct keyword *kw = NULL;
> +    int r;
> +
> +    /* Match longest keyword */
> +    for (i = 0; i < ARRAY_SIZE (keywords); i++) {
> +	/* Match case if keyword begins with upper case letter. */
> +	bool mcase = isupper ((unsigned char) keywords[i].name[0]);
> +
> +	n = stringcmp (s, keywords[i].name, keywords[i].minlen, mcase);
> +	if (n > max_n) {
> +	    max_n = n;
> +	    kw = &keywords[i];
> +	}
> +    }
> +
> +    if (!kw)
> +	return -PARSE_TIME_ERR_KEYWORD;
> +
> +    if (kw->set)
> +	r = kw->set (state, kw);
> +    else
> +	r = kw_set_default (state, kw);
> +
> +    return r < 0 ? r : max_n;
> +}
> +
> +/*
> + * Non-keyword parsers and their helpers.
> + */
> +
> +static int
> +set_user_tz (struct state *state, char sign, int hour, int min)
> +{
> +    int tz = hour * 60 + min;
> +
> +    assert (sign == '+' || sign == '-');
> +
> +    if (hour < 0 || hour > 14 || min < 0 || min > 60 || min % 15)
> +	return -PARSE_TIME_ERR_INVALIDTIME;
> +
> +    if (sign == '-')
> +	tz = -tz;
> +
> +    return set_field (state, TM_TZ, tz);
> +}
> +
> +/*
> + * Independent parsing of a postponed number when it wasn't consumed
> + * during parsing of the following token.
> + *
> + * This should be able to trust that last_field and next_field are
> + * right.
> + */
> +static int
> +parse_postponed_number (struct state *state, int v, int n)
> +{
> +    /*
> +     * alright, these are really lone, won't affect parsing of
> +     * following items... it's not a multiplier, those have been eaten
> +     * away.
> +     *
> +     * also note numbers eaten away by parse_single_number.
> +     */
> +
> +    assert (n < 8);
> +
> +    switch (n) {
> +    case 1:
> +    case 2:
> +	/* hour or mday or year */
> +	if (state->last_field == TM_ABS_MON &&  /* FIXME: written mon! */
> +	    !is_field_set (state, TM_ABS_MDAY)) {
> +	    return set_field (state, TM_ABS_MDAY, v);
> +	}
> +	break;
> +    case 4:
> +	/* YYYY or +/-HHMM for TZ or HHMM or DDMM */
> +	/* FIXME: state->delim is no longer right for this function!
> +	 * why not, it could be! */
> +	if (!is_field_set (state, TM_ABS_YEAR)) {
> +	    /* FIXME: check year? */
> +	    return set_field (state, TM_ABS_YEAR, v);
> +	}
> +	break;
> +    case 6:
> +	/* FIXME: HHMMSS or DDMMYY */
> +	break;
> +    case -1:
> +	/* REVISIT */
> +	break;
> +    case 3:
> +    case 5:
> +    case 7:
> +    default:
> +	break;
> +    }
> +
> +    return -PARSE_TIME_ERR_FORMAT;
> +}
> +
> +/* Parse a single number. Typically postpone parsing until later. */
> +static int
> +parse_single_number (struct state *state, unsigned long v,
> +		     unsigned long n)
> +{
> +    assert (n);
> +
> +    /* parse things that can be parsed immediately */
> +    if (n == 8) {
> +	/* YYYYMMDD */
> +	int year = v / 10000;
> +	int mon = (v / 100) % 100;
> +	int mday = v % 100;
> +
> +	if (year < 1970 || mon < 1 || mon > 12 || mday < 1 || mday > 31)
> +	    return -PARSE_TIME_ERR_INVALIDDATE;

I think dates are checked for validity in more than one place. It might
be worth pulling that out into a function. In particular, someone might
want to check mday depending on month at some point.

> +
> +	return set_abs_date (state, year, mon, mday);
> +    } else if (n > 8) {
> +	/* FIXME: seconds since epoch */
> +	return -PARSE_TIME_ERR_FORMAT;
> +    }

This is probably an important FIXME for notmuch for backward compatibility.

> +
> +    if (v > INT_MAX)
> +	return -PARSE_TIME_ERR_FORMAT;
> +
> +    return set_postponed_number (state, v, n);
> +}
> +
> +static bool
> +is_time_sep (char c)
> +{
> +    return c == ':';
> +}
> +
> +static bool
> +is_date_sep (char c)
> +{
> +    return c == '/' || c == '-' || c == '.';
> +}
> +
> +static bool
> +is_sep (char c)
> +{
> +    return is_time_sep (c) || is_date_sep (c);
> +}
> +
> +/* two-digit year: 00...69 is 2000s, 70...99 1900s, if n == 0 keep unset */
> +static int
> +expand_year (unsigned long year, size_t n)
> +{
> +    if (n == 2) {
> +	return (year < 70 ? 2000 : 1900) + year;
> +    } else if (n == 4) {
> +	return year;
> +    } else {
> +	return UNSET;
> +    }
> +}
> +
> +static int
> +parse_date (struct state *state, char sep,
> +	    unsigned long v1, unsigned long v2, unsigned long v3,
> +	    size_t n1, size_t n2, size_t n3)
> +{
> +    int year = UNSET, mon = UNSET, mday = UNSET;
> +
> +    assert (is_date_sep (sep));
> +
> +    switch (sep) {
> +    case '/': /* Date: M[M]/D[D][/YY[YY]] or M[M]/YYYY */
> +	if (n1 != 1 && n1 != 2)
> +	    return -PARSE_TIME_ERR_DATEFORMAT;
> +
> +	if ((n2 == 1 || n2 == 2) && (n3 == 0 || n3 == 2 || n3 == 4)) {
> +	    /* M[M]/D[D][/YY[YY]] */
> +	    year = expand_year (v3, n3);
> +	    mon = v1;
> +	    mday = v2;
> +	} else if (n2 == 4 && n3 == 0) {
> +	    /* M[M]/YYYY */
> +	    year = v2;
> +	    mon = v1;
> +	} else {
> +	    return -PARSE_TIME_ERR_DATEFORMAT;
> +	}
> +	break;
> +
> +    case '-': /* Date: YYYY-MM[-DD] or DD-MM[-YY[YY]] or MM-YYYY */
> +	if (n1 == 4 && n2 == 2 && (n3 == 0 || n3 == 2)) {
> +	    /* YYYY-MM[-DD] */
> +	    year = v1;
> +	    mon = v2;
> +	    if (n3)
> +		mday = v3;
> +	} else if (n1 == 2 && n2 == 2 && (n3 == 0 || n3 == 2 || n3 == 4)) {
> +	    /* DD-MM[-YY[YY]] */
> +	    year = expand_year (v3, n3);
> +	    mon = v2;
> +	    mday = v1;
> +	} else if (n1 == 2 && n2 == 4 && n3 == 0) {
> +	    /* MM-YYYY */
> +	    year = v2;
> +	    mon = v1;
> +	} else {
> +	    return -PARSE_TIME_ERR_DATEFORMAT;
> +	}
> +	break;
> +
> +    case '.': /* Date: D[D].M[M][.[YY[YY]]] */
> +	if ((n1 != 1 && n1 != 2) || (n2 != 1 && n2 != 2) ||
> +	    (n3 != 0 && n3 != 2 && n3 != 4))
> +	    return -PARSE_TIME_ERR_DATEFORMAT;
> +
> +	year = expand_year (v3, n3);
> +	mon = v2;
> +	mday = v1;
> +	break;
> +    }
> +
> +    if (year != UNSET && year < 1970)
> +	return -PARSE_TIME_ERR_INVALIDDATE;
> +
> +    if (mon != UNSET && (mon < 1 || mon > 12))
> +	return -PARSE_TIME_ERR_INVALIDDATE;
> +
> +    if (mday != UNSET && (mday < 1 || mday > 31))
> +	return -PARSE_TIME_ERR_INVALIDDATE;
> +
> +    return set_abs_date (state, year, mon, mday);
> +}
> +
> +static int
> +parse_time (struct state *state, char sep,
> +	    unsigned long v1, unsigned long v2, unsigned long v3,
> +	    size_t n1, size_t n2, size_t n3)
> +{
> +    assert (is_time_sep (sep));
> +
> +    if ((n1 != 1 && n1 != 2) || n2 != 2 || (n3 != 0 && n3 != 2))
> +	return -PARSE_TIME_ERR_TIMEFORMAT;
> +
> +    /*
> +     * REVISIT: this means it's required to set time *before* being
> +     * able to set timezone
> +     */
> +    if (is_field_set (state, TM_ABS_HOUR) &&
> +	is_field_set (state, TM_ABS_MIN) &&
> +	n1 == 2 && n2 == 2 && n3 == 0 &&
> +	(state->delim == '+' || state->delim == '-')) {
> +	return set_user_tz (state, state->delim, v1, v2);
> +    }
> +
> +    if (v1 > 24 || v2 > 60 || v3 > 60)
> +	return -PARSE_TIME_ERR_INVALIDTIME;

Are the > rather than >= deliberate here (i.e. do you mean to allow 60
for minutes or seconds)?

> +
> +    return set_abs_time (state, v1, v2, n3 ? v3 : 0);
> +}
> +
> +/* strtoul helper that assigns length */
> +static unsigned long
> +strtoul_len (const char *s, const char **endp, size_t *len)
> +{
> +    unsigned long val = strtoul (s, (char **) endp, 10);
> +
> +    *len = *endp - s;
> +    return val;
> +}
> +
> +/*
> + * Parse a (group of) number(s). Return < 0 on error, number of parsed
> + * chars on success.
> + */
> +static ssize_t
> +parse_number (struct state *state, const char *s)
> +{
> +    int r;
> +    unsigned long v1, v2, v3 = 0;
> +    size_t n1, n2, n3 = 0;
> +    const char *p = s;
> +    char sep;
> +
> +    v1 = strtoul_len (p, &p, &n1);
> +
> +    if (is_sep (*p) && isdigit ((unsigned char) *(p + 1))) {
> +	sep = *p;
> +	v2 = strtoul_len (p + 1, &p, &n2);
> +    } else {
> +	/* a single number */
> +	r = parse_single_number (state, v1, n1);
> +	if (r)
> +	    return r;
> +
> +	return p - s;
> +    }
> +
> +    /* a group of two or three numbers? */
> +    if (*p == sep && isdigit ((unsigned char) *(p + 1)))
> +	v3 = strtoul_len (p + 1, &p, &n3);
> +
> +    if (is_time_sep (sep))
> +	r = parse_time (state, sep, v1, v2, v3, n1, n2, n3);
> +    else
> +	r = parse_date (state, sep, v1, v2, v3, n1, n2, n3);
> +
> +    if (r)
> +	return r;
> +
> +    return p - s;
> +}
> +
> +/*
> + * Parse delimiter(s). Return < 0 on error, number of parsed chars on
> + * success.
> + */
> +static ssize_t
> +parse_delim (struct state *state, const char *s)
> +{
> +    const char *p = s;
> +
> +    /*
> +     * REVISIT: any actions depending on the first delim after last
> +     * field? what could it be?
> +     */
> +
> +    /*
> +     * skip non-alpha and non-digit, and store the last for further
> +     * processing
> +     */
> +    while (*p && !isalnum ((unsigned char) *p)) {
> +	set_delim (state, *p);
> +	p++;
> +    }
> +
> +    return p - s;
> +}
> +
> +/*
> + * Parse a date/time string. Return < 0 on error, number of parsed
> + * chars on success.
> + */
> +static ssize_t
> +parse_input (struct state *state, const char *s)
> +{
> +    const char *p = s;
> +    ssize_t n;
> +    int r;
> +
> +    while (*p) {
> +	if (isalpha ((unsigned char) *p)) {
> +	    n = parse_keyword (state, p);
> +	} else if (isdigit ((unsigned char) *p)) {
> +	    n = parse_number (state, p);
> +	} else {
> +	    n = parse_delim (state, p);
> +	}
> +
> +	if (n <= 0) {
> +	    if (n == 0)
> +		n = -PARSE_TIME_ERR;
> +
> +	    return n;             /* FIXME */
> +	}
> +
> +	p += n;
> +    }
> +
> +    /* parse postponed number, if any */
> +    r = handle_postponed_number (state);
> +    if (r < 0)
> +	return r;
> +
> +    return p - s;
> +}
> +
> +/*
> + * Processing the parsed input.
> + */
> +
> +/*
> + * Initialize reference time to tm. Use time zone in state if
> + * specified, otherwise local time. Use now for reference time if
> + * non-NULL, otherwise current time.
> + */
> +static int
> +initialize_now (struct state *state, struct tm *tm, const time_t *now)
> +{
> +    time_t t;
> +
> +    if (now) {
> +	t = *now;
> +    } else {
> +	if (time (&t) == (time_t) -1)
> +	    return -PARSE_TIME_ERR_LIB;
> +    }
> +
> +    if (is_field_set (state, TM_TZ)) {
> +	/* some other time zone */
> +
> +	/* adjust now according to the TZ */
> +	t += get_field (state, TM_TZ) * 60;
> +
> +	/* it's not gm, but this doesn't mess with the tz */
> +	if (gmtime_r (&t, tm) == NULL)
> +	    return -PARSE_TIME_ERR_LIB;
> +    } else {
> +	/* local time */
> +	if (localtime_r (&t, tm) == NULL)
> +	    return -PARSE_TIME_ERR_LIB;
> +    }
> +
> +    return 0;
> +}
> +
> +/*
> + * Normalize tm according to mktime(3). Both mktime(3) and
> + * localtime_r(3) use local time, but they cancel each other out here,
> + * making this function agnostic to time zone.
> + */
> +static int
> +normalize_tm (struct tm *tm)
> +{
> +    time_t t = mktime (tm);
> +
> +    if (t == (time_t) -1)
> +	return -PARSE_TIME_ERR_LIB;
> +
> +    if (!localtime_r (&t, tm))
> +	return -PARSE_TIME_ERR_LIB;
> +
> +    return 0;
> +}
> +
> +/* Get field out of a struct tm. */
> +static int
> +tm_get_field (const struct tm *tm, enum field field)
> +{
> +    switch (field) {
> +    case TM_ABS_SEC:	return tm->tm_sec;
> +    case TM_ABS_MIN:	return tm->tm_min;
> +    case TM_ABS_HOUR:	return tm->tm_hour;
> +    case TM_ABS_MDAY:	return tm->tm_mday;
> +    case TM_ABS_MON:	return tm->tm_mon + 1; /* 0- to 1-based */
> +    case TM_ABS_YEAR:	return 1900 + tm->tm_year;
> +    case TM_ABS_WDAY:	return tm->tm_wday;
> +    case TM_ABS_ISDST:	return tm->tm_isdst;
> +    default:
> +	assert (false);
> +	break;
> +    }
> +
> +    return 0;
> +}
> +
> +/* Modify hour according to am/pm setting. */
> +static int
> +fixup_ampm (struct state *state)
> +{
> +    int hour, hdiff = 0;
> +
> +    if (!is_field_set (state, TM_AMPM))
> +	return 0;
> +
> +    if (!is_field_set (state, TM_ABS_HOUR))
> +	return -PARSE_TIME_ERR_TIMEFORMAT;
> +
> +    hour = get_field (state, TM_ABS_HOUR);
> +    if (hour < 1 || hour > 12)
> +	return -PARSE_TIME_ERR_INVALIDTIME;
> +
> +    if (get_field (state, TM_AMPM)) {
> +	/* 12pm is noon */
> +	if (hour != 12)
> +	    hdiff = 12;
> +    } else {
> +	/* 12am is midnight, beginning of day */
> +	if (hour == 12)
> +	    hdiff = -12;
> +    }
> +
> +    mod_field (state, TM_REL_HOUR, -hdiff);
> +
> +    return 0;
> +}
> +
> +/* Combine absolute and relative fields, and round. */
> +static int
> +create_output (struct state *state, time_t *t_out, const time_t *tnow,
> +	       int round)
> +{
> +    struct tm tm = { 0 };
> +    struct tm now;
> +    enum field f;
> +    int r;
> +    int week_round = PARSE_TIME_NO_ROUND;
> +
> +    r = initialize_now (state, &now, tnow);
> +    if (r)
> +	return r;
> +
> +    /* initialize uninitialized fields to now */
> +    for (f = TM_ABS_SEC; f != TM_NONE; f = next_field (f)) {
> +	if (state->set[f] == FIELD_NOW) {
> +	    state->tm[f] = tm_get_field (&now, f);
> +	    state->set[f] = FIELD_SET;
> +	}
> +    }
> +
> +    /*
> +     * If MON is set but YEAR is not, refer to past month.
> +     *
> +     * REVISIT: Why are month/week special in this regard? What about
> +     * mday, or time. Should refer to past.
> +     */
> +    if (is_field_set (state, TM_ABS_MON) &&
> +	!is_field_set (state, TM_ABS_YEAR)) {
> +	if (get_field (state, TM_ABS_MON) >= tm_get_field (&now, TM_ABS_MON))
> +	    mod_field (state, TM_REL_YEAR, 1);
> +    }
> +
> +    /*
> +     * If WDAY is set but MDAY is not, we consider WDAY relative
> +     *
> +     * REVISIT: This fails on stuff like "two months ago monday"
> +     * because two months ago wasn't the same day as today. Postpone
> +     * until we know date?
> +     */
> +    if (is_field_set (state, TM_ABS_WDAY) &&
> +	!is_field_set (state, TM_ABS_MDAY)) {
> +	int wday = get_field (state, TM_ABS_WDAY);
> +	int today = tm_get_field (&now, TM_ABS_WDAY);
> +	int rel_days;
> +
> +	if (today > wday)
> +	    rel_days = today - wday;
> +	else
> +	    rel_days = today + 7 - wday;
> +
> +	/* this also prevents special week rounding from happening */
> +	mod_field (state, TM_REL_DAY, rel_days);
> +
> +	unset_field (state, TM_ABS_WDAY);
> +    }
> +
> +    r = fixup_ampm (state);
> +    if (r)
> +	return r;
> +
> +    /*
> +     * Iterate fields from least accurate to most accurate, and set
> +     * unset fields according to requested rounding.
> +     */
> +    for (f = TM_ABS_SEC; f != TM_NONE; f = next_field (f)) {
> +	if (round != PARSE_TIME_NO_ROUND) {
> +	    enum field r = abs_to_rel_field (f);

The comment and the code seem to disagree on the ordering.

> +
> +	    if (is_field_set (state, f) || is_field_set (state, r)) {
> +		if (round >= PARSE_TIME_ROUND_UP)
> +		    mod_field (state, r, -1);
> +		round = PARSE_TIME_NO_ROUND; /* no more rounding */
> +	    } else {
> +		if (f == TM_ABS_MDAY &&
> +		    is_field_set (state, TM_REL_WEEK)) {
> +		    /* week is most accurate */
> +		    week_round = round;
> +		    round = PARSE_TIME_NO_ROUND;
> +		} else {
> +		    set_field (state, f, field_zero (f));
> +		}
> +	    }
> +	}
> +
> +	if (!is_field_set (state, f))
> +	    set_field (state, f, tm_get_field (&now, f));
> +    }
> +
> +    /* special case: rounding with week accuracy */
> +    if (week_round != PARSE_TIME_NO_ROUND) {
> +	/* temporarily set more accurate fields to now */
> +	set_field (state, TM_ABS_SEC, tm_get_field (&now, TM_ABS_SEC));
> +	set_field (state, TM_ABS_MIN, tm_get_field (&now, TM_ABS_MIN));
> +	set_field (state, TM_ABS_HOUR, tm_get_field (&now, TM_ABS_HOUR));
> +	set_field (state, TM_ABS_MDAY, tm_get_field (&now, TM_ABS_MDAY));
> +    }
> +
> +    /*
> +     * set all fields. they may contain out of range values before
> +     * normalization by mktime(3).
> +     */
> +    tm.tm_sec = get_field (state, TM_ABS_SEC) - get_field (state, TM_REL_SEC);
> +    tm.tm_min = get_field (state, TM_ABS_MIN) - get_field (state, TM_REL_MIN);
> +    tm.tm_hour = get_field (state, TM_ABS_HOUR) - get_field (state, TM_REL_HOUR);
> +    tm.tm_mday = get_field (state, TM_ABS_MDAY) -
> +		 get_field (state, TM_REL_DAY) - 7 * get_field (state, TM_REL_WEEK);
> +    tm.tm_mon = get_field (state, TM_ABS_MON) - get_field (state, TM_REL_MON);
> +    tm.tm_mon--; /* 1- to 0-based */
> +    tm.tm_year = get_field (state, TM_ABS_YEAR) - get_field (state, TM_REL_YEAR) - 1900;
> +
> +    /*
> +     * It's always normal time.
> +     *
> +     * REVISIT: This is probably not a solution that universally
> +     * works. Just make sure DST is not taken into account. We don't
> +     * want rounding to be affected by DST.
> +     */
> +    tm.tm_isdst = -1;
> +
> +    /* special case: rounding with week accuracy */
> +    if (week_round != PARSE_TIME_NO_ROUND) {
> +	/* normalize to get proper tm.wday */
> +	r = normalize_tm (&tm);
> +	if (r < 0)
> +	    return r;
> +
> +	/* set more accurate fields back to zero */
> +	tm.tm_sec = 0;
> +	tm.tm_min = 0;
> +	tm.tm_hour = 0;
> +	tm.tm_isdst = -1;
> +
> +	/* monday is the true 1st day of week, but this is easier */
> +	if (week_round <= PARSE_TIME_ROUND_DOWN)
> +	    tm.tm_mday -= tm.tm_wday;
> +	else
> +	    tm.tm_mday += 7 - tm.tm_wday;
> +    }
> +
> +    /* if TZ specified, convert from TZ to local time for mktime(3) */
> +    if (is_field_set (state, TM_TZ)) {
> +	time_t t = mktime (&tm);
> +
> +	/* from specified TZ to UTC */
> +	tm.tm_min -= get_field (state, TM_TZ);
> +
> +	/* from UTC to local TZ (yes, it's hacky - FIXME) */
> +	tm.tm_sec += difftime (mktime (localtime (&t)), mktime (gmtime (&t)));
> +    }
> +
> +    /* FIXME: check return value, don't set if fail */
> +    *t_out = mktime (&tm);
> +
> +    return 0;
> +}
> +
> +/* internally, all errors are < 0. parse_time_string() returns errors > 0. */
> +#define EXTERNAL_ERR(r) (-r)
> +
> +int
> +parse_time_string (const char *s, time_t *t, const time_t *now, int round)
> +{
> +    struct state state = { { 0 } };
> +    int r;
> +
> +    if (!s || !t)
> +	return EXTERNAL_ERR (-PARSE_TIME_ERR);
> +
> +    r = parse_input (&state, s);
> +    if (r < 0)
> +	return EXTERNAL_ERR (r);
> +
> +    r = create_output (&state, t, now, round);
> +    if (r < 0)
> +	return EXTERNAL_ERR (r);
> +
> +    return 0;
> +}
> diff --git a/lib/parse-time-string.h b/lib/parse-time-string.h
> new file mode 100644
> index 0000000..50b7c6f
> --- /dev/null
> +++ b/lib/parse-time-string.h
> @@ -0,0 +1,95 @@
> +/*
> + * parse time string - user friendly date and time parser
> + * Copyright © 2012 Jani Nikula
> + *
> + * This program is free software: you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation, either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program.  If not, see <http://www.gnu.org/licenses/>.
> + *
> + * Author: Jani Nikula <jani@nikula.org>
> + */
> +
> +#ifndef PARSE_TIME_STRING_H
> +#define PARSE_TIME_STRING_H
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +#include <time.h>
> +
> +/* return values for parse_time_string() */
> +enum {
> +    PARSE_TIME_OK = 0,
> +    PARSE_TIME_ERR,		/* unspecified error */
> +    PARSE_TIME_ERR_LIB,		/* library call failed */
> +    PARSE_TIME_ERR_ALREADYSET,	/* attempt to set unit twice */
> +    PARSE_TIME_ERR_FORMAT,	/* generic date/time format error */
> +    PARSE_TIME_ERR_DATEFORMAT,	/* date format error */
> +    PARSE_TIME_ERR_TIMEFORMAT,	/* time format error */
> +    PARSE_TIME_ERR_INVALIDDATE,	/* date value error */
> +    PARSE_TIME_ERR_INVALIDTIME,	/* time value error */
> +    PARSE_TIME_ERR_KEYWORD,	/* unknown keyword */
> +};
> +
> +/* round values for parse_time_string() */
> +enum {
> +    PARSE_TIME_ROUND_DOWN = -1,
> +    PARSE_TIME_NO_ROUND = 0,
> +    PARSE_TIME_ROUND_UP = 1,
> +};
> +
> +/**
> + * parse_time_string() - user friendly date and time parser
> + * @s:		string to parse
> + * @t:		pointer to time_t to store parsed time in
> + * @now:	pointer to time_t containing reference date/time, or NULL
> + * @round:	PARSE_TIME_NO_ROUND, PARSE_TIME_ROUND_DOWN, or
> + *		PARSE_TIME_ROUND_UP
> + *
> + * Parse a date/time string 's' and store the parsed date/time result
> + * in 't'.
> + *
> + * A reference date/time is used for determining the "date/time units"
> + * (roughly equivalent to struct tm members) not specified by 's'. If
> + * 'now' is non-NULL, it must contain a pointer to a time_t to be used
> + * as reference date/time. Otherwise, the current time is used.
> + *
> + * If 's' does not specify a full date/time, the 'round' parameter
> + * specifies if and how the result should be rounded as follows:
> + *
> + *   PARSE_TIME_NO_ROUND: All date/time units that are not specified
> + *   by 's' are set to the corresponding unit derived from the
> + *   reference date/time.
> + *
> + *   PARSE_TIME_ROUND_DOWN: All date/time units that are more accurate
> + *   than the most accurate unit specified by 's' are set to the
> + *   smallest valid value for that unit. Rest of the unspecified units
> + *   are set as in PARSE_TIME_NO_ROUND.
> + *
> + *   PARSE_TIME_ROUND_UP: All date/time units that are more accurate
> + *   than the most accurate unit specified by 's' are set to the
> + *   smallest valid value for that unit. The most accurate unit
> + *   specified by 's' is incremented by one (and this is rolled over
> + *   to the less accurate units as necessary). Rest of the unspecified
> + *   units are set as in PARSE_TIME_NO_ROUND.
> + *
> + * Return 0 (PARSE_TIME_OK) for succesfully parsed date/time, or one
> + * of PARSE_TIME_ERR_* on error. 't' is not modified on error.
> + */
> +int parse_time_string (const char *s, time_t *t, const time_t *now, int round);
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +#endif /* PARSE_TIME_STRING_H */
> -- 
> 1.7.5.4
> 
> _______________________________________________
> notmuch mailing list
> notmuch@notmuchmail.org
> http://notmuchmail.org/mailman/listinfo/notmuch

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC PATCH 1/2] lib: add date/time parser
  2012-02-26  8:45   ` Mark Walters
@ 2012-02-26 20:39     ` Jani Nikula
  0 siblings, 0 replies; 8+ messages in thread
From: Jani Nikula @ 2012-02-26 20:39 UTC (permalink / raw)
  To: Mark Walters, notmuch

On Sun, 26 Feb 2012 08:45:22 +0000, Mark Walters <markwalters1009@gmail.com> wrote:
> 
> Hi I have not read all of this carefully but it looks very nice to
> me. It is pleasantly nice to read. 

Thank you!

> I have not looked through the create output function yet but have looked
> at most of the rest.

There are a few rough edges in the output part still, but as you've
perhaps noticed it's a completely separate stage from the parsing bit.

> My only concern (as mentioned on irc) is the question of
> internationalisation. I think most of the this can be done by allowing
> other keyword tables and that seems quite clean. Ideally I think the
> user would set which to localisation use in the config file and then the
> cli would pass that to the lib parser.

I think it might be possible to tweak the table and the keyword matching
in a way that makes the ordering in the table unimportant. The strings
themselves could contain information about the abbreviation points and
priority wrt same length matches, and those could be part of the
translation through gettext. And I don't think it would even bloat the
code much.

Assuming gettext would be the internationalization method of choice.

> I think it would be a shame to hold up this very useful functionality
> just because of these internationalisation concerns.

I agree but I'm heavily biased! ;)

> The code is fairly large but it is easy to read and I would imagine
> (excepting the internationalisation question) almost maintenance free.
> 
> On the actual code I have a small number of comments/queries below.

Replies to them inline, and fixes pushed to [1] along with some other
fixes and improvements.

BR,
Jani.


[1] http://gitorious.org/parse-time-string

> 
> Best wishes
> 
> Mark
> 
> On Mon, 20 Feb 2012 00:55:51 +0200, Jani Nikula <jani@nikula.org> wrote:
> > Signed-off-by: Jani Nikula <jani@nikula.org>
> > ---
> >  lib/Makefile.local      |    1 +
> >  lib/parse-time-string.c | 1304 +++++++++++++++++++++++++++++++++++++++++++++++
> >  lib/parse-time-string.h |   95 ++++
> >  3 files changed, 1400 insertions(+), 0 deletions(-)
> >  create mode 100644 lib/parse-time-string.c
> >  create mode 100644 lib/parse-time-string.h
> > 
> > diff --git a/lib/Makefile.local b/lib/Makefile.local
> > index 54c4dea..803a284 100644
> > --- a/lib/Makefile.local
> > +++ b/lib/Makefile.local
> > @@ -53,6 +53,7 @@ libnotmuch_c_srcs =		\
> >  	$(dir)/libsha1.c	\
> >  	$(dir)/message-file.c	\
> >  	$(dir)/messages.c	\
> > +	$(dir)/parse-time-string.c	\
> >  	$(dir)/sha1.c		\
> >  	$(dir)/tags.c
> >  
> > diff --git a/lib/parse-time-string.c b/lib/parse-time-string.c
> > new file mode 100644
> > index 0000000..59713dc
> > --- /dev/null
> > +++ b/lib/parse-time-string.c
> > @@ -0,0 +1,1304 @@
> > +/*
> > + * parse time string - user friendly date and time parser
> > + * Copyright © 2012 Jani Nikula
> > + *
> > + * This program is free software: you can redistribute it and/or modify
> > + * it under the terms of the GNU General Public License as published by
> > + * the Free Software Foundation, either version 2 of the License, or
> > + * (at your option) any later version.
> > + *
> > + * This program is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > + * GNU General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU General Public License
> > + * along with this program.  If not, see <http://www.gnu.org/licenses/>.
> > + *
> > + * Author: Jani Nikula <jani@nikula.org>
> > + */
> > +
> > +#ifndef PARSE_TIME_DEBUG
> > +#define NDEBUG /* for assert() */
> > +#endif
> > +
> > +#include <assert.h>
> > +#include <ctype.h>
> > +#include <errno.h>
> > +#include <limits.h>
> > +#include <stdio.h>
> > +#include <stdarg.h>
> > +#include <stdbool.h>
> > +#include <stdlib.h>
> > +#include <string.h>
> > +#include <strings.h>
> > +#include <time.h>
> > +#include <sys/time.h>
> > +#include <sys/types.h>
> > +
> > +#include "parse-time-string.h"
> > +
> > +#define ARRAY_SIZE(a) (sizeof (a) / sizeof (a[0]))
> > +
> > +/* field indices in struct state tm, and set fields */
> > +enum field {
> > +    /* keep SEC...YEAR in this order */
> > +    TM_ABS_SEC,		/* seconds */
> > +    TM_ABS_MIN,		/* minutes */
> > +    TM_ABS_HOUR,	/* hours */
> > +    TM_ABS_MDAY,	/* day of the month */
> > +    TM_ABS_MON,		/* month */
> > +    TM_ABS_YEAR,	/* year */
> > +
> > +    TM_ABS_WDAY,	/* day of the week. special: may be relative */
> > +    TM_ABS_ISDST,	/* daylight saving time */
> > +
> > +    TM_AMPM,		/* am vs. pm */
> > +    TM_TZ,		/* timezone in minutes */
> > +
> > +    /* keep SEC...YEAR in this order */
> > +    TM_REL_SEC,		/* seconds relative to now */
> > +    TM_REL_MIN,		/* minutes ... */
> > +    TM_REL_HOUR,	/* hours ... */
> > +    TM_REL_DAY,		/* days ... */
> > +    TM_REL_MON,		/* months ... */
> > +    TM_REL_YEAR,	/* years ... */
> > +    TM_REL_WEEK,	/* weeks ... */
> > +
> > +    TM_NONE,		/* not a field */
> > +
> > +    TM_SIZE = TM_NONE,
> > +};
> > +
> > +enum field_set {
> > +    FIELD_UNSET,
> > +    FIELD_SET,
> > +    FIELD_NOW,
> > +};
> > +
> > +static enum field
> > +next_field (enum field field)
> > +{
> > +    /* note: depends on the enum ordering */
> > +    return field < TM_ABS_YEAR ? field + 1 : TM_NONE;
> > +}
> > +
> > +static enum field
> > +abs_to_rel_field (enum field field)
> > +{
> > +    assert (field <= TM_ABS_YEAR);
> > +
> > +    /* note: depends on the enum ordering */
> > +    return field + (TM_REL_SEC - TM_ABS_SEC);
> > +}
> > +
> > +/* get zero value for field */
> > +static int
> > +field_zero (enum field field)
> > +{
> > +    if (field == TM_ABS_MDAY || field == TM_ABS_MON)
> > +	return 1;
> > +    else if (field == TM_ABS_YEAR)
> > +	return 1970;
> > +    else
> > +	return 0;
> > +}
> > +
> > +struct state {
> > +    int tm[TM_SIZE];			/* parsed date and time */
> > +    enum field_set set[TM_SIZE];	/* set status of tm */
> > +
> > +    enum field last_field;
> > +    char delim;
> > +
> > +    int postponed_length;	/* number of digits in postponed value */
> > +    int postponed_value;
> > +};
> 
> Personally I would prefer this above the function definitions (but
> obviously that is up to you).

I just wanted to keep the couple of enum field related functions near
the enum field definition because they have a dependency on the enum
ordering.

> 
> > +
> > +/*
> > + * Helpers for postponed numbers.
> > + *
> > + * postponed_length is the number of digits in postponed value. 0
> > + * means there is no postponed number. -1 means there is a postponed
> > + * number, but it comes from a keyword, and it doesn't have digits.
> > + */
> > +static int
> > +get_postponed_length (struct state *state)
> > +{
> > +    return state->postponed_length;
> > +}
> > +
> > +static bool
> > +get_postponed_number (struct state *state, int *v, int *n)
> > +{
> > +    if (!state->postponed_length)
> > +	return false;
> > +
> > +    if (n)
> > +	*n = state->postponed_length;
> > +
> > +    if (v)
> > +	*v = state->postponed_value;
> > +
> > +    state->postponed_length = 0;
> > +    state->postponed_value = 0;
> > +
> > +    return true;
> > +}
> > +
> > +/* parse postponed number if one exists */
> > +static int parse_postponed_number (struct state *state, int v, int n);
> > +static int
> > +handle_postponed_number (struct state *state)
> > +{
> > +    int v = state->postponed_value;
> > +    int n = state->postponed_length;
> > +
> > +    if (!n)
> > +	return 0;
> > +
> > +    state->postponed_value = 0;
> > +    state->postponed_length = 0;
> > +
> > +    return parse_postponed_number (state, v, n);
> > +}
> > +
> > +/*
> > + * set new postponed number to be handled later. if one exists
> > + * already, handle it first. n may be -1 to indicate a keyword that
> > + * has no number length.
> > + */
> > +static int
> > +set_postponed_number (struct state *state, int v, int n)
> > +{
> > +    int r;
> > +
> > +    /* parse previous postponed number, if any */
> > +    r = handle_postponed_number (state);
> > +    if (r)
> > +	return r;
> > +
> > +    state->postponed_length = n;
> > +    state->postponed_value = v;
> > +
> > +    return 0;
> > +}
> > +
> > +static void
> > +set_delim (struct state *state, char delim)
> > +{
> > +    state->delim = delim;
> > +}
> > +
> > +static void
> > +unset_delim (struct state *state)
> > +{
> > +    state->delim = 0;
> > +}
> > +
> > +/*
> > + * Field set/get/mod helpers.
> > + */
> > +
> > +/* returns unset for non-tracked fields */
> > +static bool
> > +is_field_set (struct state *state, enum field field)
> > +{
> > +    assert (field < ARRAY_SIZE (state->tm));
> > +
> > +    return field < ARRAY_SIZE (state->set) &&
> > +	   state->set[field] != FIELD_UNSET;
> > +}
> > +
> > +static void
> > +unset_field (struct state *state, enum field field)
> > +{
> > +    assert (field < ARRAY_SIZE (state->tm));
> > +
> > +    state->set[field] = FIELD_UNSET;
> > +    state->tm[field] = 0;
> > +}
> > +
> > +/* Set field to value. */
> > +static int
> > +set_field (struct state *state, enum field field, int value)
> > +{
> > +    int r;
> > +
> > +    assert (field < ARRAY_SIZE (state->tm));
> > +
> > +    /* some fields can only be set once */
> > +    if (field < ARRAY_SIZE (state->set) && state->set[field] != FIELD_UNSET)
> > +	return -PARSE_TIME_ERR_ALREADYSET;
> > +
> > +    state->set[field] = FIELD_SET;
> > +
> > +    /*
> > +     * REVISIT: There could be a "next_field" that would be set from
> > +     * "field" for the duration of the handle_postponed_number() call,
> > +     * so it has more information to work with.
> > +     */
> > +
> > +    /* parse postponed number, if any */
> > +    r = handle_postponed_number (state);
> > +    if (r)
> > +	return r;
> > +
> > +    unset_delim (state);
> > +
> > +    state->tm[field] = value;
> > +    state->last_field = field;
> > +
> > +    return 0;
> > +}
> > +
> > +/*
> > + * Mark n fields in fields to be set to current date/time in the
> > + * specified time zone, or local timezone if not specified. The fields
> > + * will be initialized after parsing is complete and timezone is
> > + * known.
> > + */
> > +static int
> > +set_fields_to_now (struct state *state, enum field *fields, size_t n)
> > +{
> > +    size_t i;
> > +    int r;
> > +
> > +    for (i = 0; i < n; i++) {
> > +	r = set_field (state, fields[i], 0);
> > +	if (r)
> > +	    return r;
> > +	state->set[fields[i]] = FIELD_NOW;
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> > +/* Modify field by adding value to it. To be used on relative fields. */
> > +static int
> > +mod_field (struct state *state, enum field field, int value)
> > +{
> > +    int r;
> > +
> > +    assert (field < ARRAY_SIZE (state->tm));   /* assert relative??? */
> > +
> > +    if (field < ARRAY_SIZE (state->set))
> > +	state->set[field] = FIELD_SET;
> > +
> > +    /* parse postponed number, if any */
> > +    r = handle_postponed_number (state);
> > +    if (r)
> > +	return r;
> > +
> > +    unset_delim (state);
> > +
> > +    state->tm[field] += value;
> > +    state->last_field = field;
> > +
> > +    return 0;
> > +}
> > +
> > +/*
> > + * Get field value. Make sure the field is set before query. It's most
> > + * likely an error to call this while parsing (for example fields set
> > + * as FIELD_NOW will only be set to some value after parsing).
> > + */
> > +static int
> > +get_field (struct state *state, enum field field)
> > +{
> > +    assert (field < ARRAY_SIZE (state->tm));
> > +
> > +    return state->tm[field];
> > +}
> > +
> > +/* Unset indicator for time and date set helpers. */
> > +#define UNSET -1
> > +
> > +/* Time set helper. No input checking. Use UNSET (-1) to leave unset. */
> > +static int
> > +set_abs_time (struct state *state, int hour, int min, int sec)
> > +{
> > +    int r;
> > +
> > +    if (hour != UNSET) {
> > +	if ((r = set_field (state, TM_ABS_HOUR, hour)))
> > +	    return r;
> > +    }
> > +
> > +    if (min != UNSET) {
> > +	if ((r = set_field (state, TM_ABS_MIN, min)))
> > +	    return r;
> > +    }
> > +
> > +    if (sec != UNSET) {
> > +	if ((r = set_field (state, TM_ABS_SEC, sec)))
> > +	    return r;
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> > +/* Date set helper. No input checking. Use UNSET (-1) to leave unset. */
> > +static int
> > +set_abs_date (struct state *state, int year, int mon, int mday)
> > +{
> > +    int r;
> > +
> > +    if (year != UNSET) {
> > +	if ((r = set_field (state, TM_ABS_YEAR, year)))
> > +	    return r;
> > +    }
> > +
> > +    if (mon != UNSET) {
> > +	if ((r = set_field (state, TM_ABS_MON, mon)))
> > +	    return r;
> > +    }
> > +
> > +    if (mday != UNSET) {
> > +	if ((r = set_field (state, TM_ABS_MDAY, mday)))
> > +	    return r;
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> > +/*
> > + * Keyword parsing and handling.
> > + */
> > +struct keyword;
> > +typedef int (*setter_t)(struct state *state, struct keyword *kw);
> > +
> > +struct keyword {
> > +    const char *name;	/* keyword */
> > +    size_t minlen;	/* min length to match, 0 = must match all */
> > +    enum field field;	/* field to set, or FIELD_NONE if N/A */
> > +    int value;		/* value to set, or 0 if N/A */
> > +    setter_t set;	/* function to use for setting, if non-NULL */
> > +};
> > +
> > +/*
> > + * Setter callback functions for keywords.
> > + */
> > +static int
> > +kw_set_default (struct state *state, struct keyword *kw)
> > +{
> > +    return set_field (state, kw->field, kw->value);
> > +}
> > +
> > +static int
> > +kw_set_rel (struct state *state, struct keyword *kw)
> > +{
> > +    int multiplier = 1;
> > +
> > +    /* get a previously set multiplier, if any */
> > +    get_postponed_number (state, &multiplier, NULL);
> > +
> > +    /* accumulate relative field values */
> > +    return mod_field (state, kw->field, multiplier * kw->value);
> > +}
> > +
> > +static int
> > +kw_set_number (struct state *state, struct keyword *kw)
> > +{
> > +    /* -1 = no length, from keyword */
> > +    return set_postponed_number (state, kw->value, -1);
> > +}
> > +
> > +static int
> > +kw_set_month (struct state *state, struct keyword *kw)
> > +{
> > +    int n = get_postponed_length (state);
> > +
> > +    /* consume postponed number if it could be mday */
> > +    if (n == 1 || n == 2) {
> > +	int r, v;
> > +
> > +	get_postponed_number (state, &v, NULL);
> > +
> > +	if (v < 1 || v > 31)
> > +	    return -PARSE_TIME_ERR_INVALIDDATE;
> > +
> > +	r = set_field (state, TM_ABS_MDAY, v);
> > +	if (r)
> > +	    return r;
> > +    }
> > +
> > +    return set_field (state, kw->field, kw->value);
> > +}
> > +
> > +static int
> > +kw_set_ampm (struct state *state, struct keyword *kw)
> > +{
> > +    int n = get_postponed_length (state);
> > +
> > +    /* consume postponed number if it could be hour */
> > +    if (n == 1 || n == 2) {
> > +	int r, v;
> > +
> > +	get_postponed_number (state, &v, NULL);
> > +
> > +	if (v < 1 || v > 12)
> > +	    return -PARSE_TIME_ERR_INVALIDTIME;
> > +
> > +	r = set_abs_time (state, v, 0, 0);
> > +	if (r)
> > +	    return r;
> > +    }
> > +
> > +    return set_field (state, kw->field, kw->value);
> > +}
> > +
> > +static int
> > +kw_set_timeofday (struct state *state, struct keyword *kw)
> > +{
> > +    return set_abs_time (state, kw->value, 0, 0);
> > +}
> > +
> > +static int
> > +kw_set_today (struct state *state, struct keyword *kw)
> > +{
> > +    enum field fields[] = { TM_ABS_YEAR, TM_ABS_MON, TM_ABS_MDAY };
> > +
> > +    return set_fields_to_now (state, fields, ARRAY_SIZE (fields));
> > +}
> > +
> > +static int
> > +kw_set_now (struct state *state, struct keyword *kw)
> > +{
> > +    enum field fields[] = { TM_ABS_HOUR, TM_ABS_MIN, TM_ABS_SEC };
> > +
> > +    return set_fields_to_now (state, fields, ARRAY_SIZE (fields));
> > +}
> > +
> > +static int
> > +kw_set_ordinal (struct state *state, struct keyword *kw)
> > +{
> > +    int n, v;
> > +
> > +    /* require a postponed number */
> > +    if (!get_postponed_number (state, &v, &n))
> > +	return -PARSE_TIME_ERR_DATEFORMAT;
> > +
> > +    /* ordinals are mday */
> > +    if (n != 1 && n != 2)
> > +	return -PARSE_TIME_ERR_DATEFORMAT;
> > +
> > +    /* be strict about st, nd, rd, and lax about th */
> > +    if (strcasecmp (kw->name, "st") == 0 && v != 1 && v != 21 && v != 31)
> > +	return -PARSE_TIME_ERR_INVALIDDATE;
> > +    else if (strcasecmp (kw->name, "nd") == 0 && v != 2 && v != 22)
> > +	return -PARSE_TIME_ERR_INVALIDDATE;
> > +    else if (strcasecmp (kw->name, "rd") == 0 && v != 3 && v != 23)
> > +	return -PARSE_TIME_ERR_INVALIDDATE;
> > +    else if (strcasecmp (kw->name, "th") == 0 && (v < 1 || v > 31))
> > +	return -PARSE_TIME_ERR_INVALIDDATE;
> > +
> > +    return set_field (state, TM_ABS_MDAY, v);
> > +}
> > +
> > +/*
> > + * Accepted keywords.
> > + *
> > + * If keyword begins with upper case letter, then the matching will be
> > + * case sensitive. Otherwise the matching is case insensitive.
> > + *
> > + * If setter is NULL, set_default will be used.
> > + *
> > + * Note: Order matters. Matching is greedy, longest match is used, but
> > + * of equal length matches the first one is used.
> > + */
> > +static struct keyword keywords[] = {
> > +    /* weekdays */
> > +    { "sunday",		3,	TM_ABS_WDAY,	0,	NULL },
> > +    { "monday",		3,	TM_ABS_WDAY,	1,	NULL },
> > +    { "tuesday",	3,	TM_ABS_WDAY,	2,	NULL },
> > +    { "wednesday",	3,	TM_ABS_WDAY,	3,	NULL },
> > +    { "thursday",	3,	TM_ABS_WDAY,	4,	NULL },
> > +    { "friday",		3,	TM_ABS_WDAY,	5,	NULL },
> > +    { "saturday",	3,	TM_ABS_WDAY,	6,	NULL },
> > +
> > +    /* months */
> > +    { "january",	3,	TM_ABS_MON,	1,	kw_set_month },
> > +    { "february",	3,	TM_ABS_MON,	2,	kw_set_month },
> > +    { "march",		3,	TM_ABS_MON,	3,	kw_set_month },
> > +    { "april",		3,	TM_ABS_MON,	4,	kw_set_month },
> > +    { "may",		3,	TM_ABS_MON,	5,	kw_set_month },
> > +    { "june",		3,	TM_ABS_MON,	6,	kw_set_month },
> > +    { "july",		3,	TM_ABS_MON,	7,	kw_set_month },
> > +    { "august",		3,	TM_ABS_MON,	8,	kw_set_month },
> > +    { "september",	3,	TM_ABS_MON,	9,	kw_set_month },
> > +    { "october",	3,	TM_ABS_MON,	10,	kw_set_month },
> > +    { "november",	3,	TM_ABS_MON,	11,	kw_set_month },
> > +    { "december",	3,	TM_ABS_MON,	12,	kw_set_month },
> > +
> > +    /* durations */
> > +    { "years",		1,	TM_REL_YEAR,	1,	kw_set_rel },
> > +    { "weeks",		1,	TM_REL_WEEK,	1,	kw_set_rel },
> > +    { "days",		1,	TM_REL_DAY,	1,	kw_set_rel },
> > +    { "hours",		1,	TM_REL_HOUR,	1,	kw_set_rel },
> > +    { "hrs",		1,	TM_REL_HOUR,	1,	kw_set_rel },
> > +    /* M=months, m=minutes. single M must precede minutes in the list. */
> > +    { "M",		1,	TM_REL_MON,	1,	kw_set_rel },
> > +    { "minutes",	1,	TM_REL_MIN,	1,	kw_set_rel },
> > +    { "mins",		1,	TM_REL_MIN,	1,	kw_set_rel },
> > +    { "months",		1,	TM_REL_MON,	1,	kw_set_rel },
> > +    { "seconds",	1,	TM_REL_SEC,	1,	kw_set_rel },
> > +    { "secs",		1,	TM_REL_SEC,	1,	kw_set_rel },
> > +
> > +    /* numbers */
> > +    { "one",		0,	TM_NONE,	1,	kw_set_number },
> > +    { "two",		0,	TM_NONE,	2,	kw_set_number },
> > +    { "three",		0,	TM_NONE,	3,	kw_set_number },
> > +    { "four",		0,	TM_NONE,	4,	kw_set_number },
> > +    { "five",		0,	TM_NONE,	5,	kw_set_number },
> > +    { "six",		0,	TM_NONE,	6,	kw_set_number },
> > +    { "seven",		0,	TM_NONE,	7,	kw_set_number },
> > +    { "eight",		0,	TM_NONE,	8,	kw_set_number },
> > +    { "nine",		0,	TM_NONE,	9,	kw_set_number },
> > +    { "ten",		0,	TM_NONE,	10,	kw_set_number },
> > +    { "dozen",		0,	TM_NONE,	12,	kw_set_number },
> > +    { "hundred",	0,	TM_NONE,	100,	kw_set_number },
> > +
> > +    /* special number forms */
> > +    { "this",		0,	TM_NONE,	0,	kw_set_number },
> > +    { "last",		0,	TM_NONE,	1,	kw_set_number },
> > +
> > +    /* specials */
> > +    { "yesterday",	0,	TM_REL_DAY,	1,	kw_set_rel },
> > +    { "today",		0,	TM_NONE,	0,	kw_set_today },
> > +    { "now",		0,	TM_NONE,	0,	kw_set_now },
> > +    { "noon",		0,	TM_NONE,	12,	kw_set_timeofday },
> > +    { "midnight",	0,	TM_NONE,	0,	kw_set_timeofday },
> > +    { "am",		0,	TM_AMPM,	0,	kw_set_ampm },
> > +    { "a.m.",		0,	TM_AMPM,	0,	kw_set_ampm },
> > +    { "pm",		0,	TM_AMPM,	1,	kw_set_ampm },
> > +    { "p.m.",		0,	TM_AMPM,	1,	kw_set_ampm },
> > +    { "st",		0,	TM_NONE,	0,	kw_set_ordinal },
> > +    { "nd",		0,	TM_NONE,	0,	kw_set_ordinal },
> > +    { "rd",		0,	TM_NONE,	0,	kw_set_ordinal },
> > +    { "th",		0,	TM_NONE,	0,	kw_set_ordinal },
> > +
> > +    /* timezone codes: offset in minutes. FIXME: add more codes. */
> > +    { "pst",		0,	TM_TZ,		-8*60,	NULL },
> > +    { "mst",		0,	TM_TZ,		-7*60,	NULL },
> > +    { "cst",		0,	TM_TZ,		-6*60,	NULL },
> > +    { "est",		0,	TM_TZ,		-5*60,	NULL },
> > +    { "ast",		0,	TM_TZ,		-4*60,	NULL },
> > +    { "nst",		0,	TM_TZ,		-(3*60+30),	NULL },
> > +
> > +    { "gmt",		0,	TM_TZ,		0,	NULL },
> > +    { "utc",		0,	TM_TZ,		0,	NULL },
> > +
> > +    { "wet",		0,	TM_TZ,		0,	NULL },
> > +    { "cet",		0,	TM_TZ,		1*60,	NULL },
> > +    { "eet",		0,	TM_TZ,		2*60,	NULL },
> > +    { "fet",		0,	TM_TZ,		3*60,	NULL },
> > +
> > +    { "wat",		0,	TM_TZ,		1*60,	NULL },
> > +    { "cat",		0,	TM_TZ,		2*60,	NULL },
> > +    { "eat",		0,	TM_TZ,		3*60,	NULL },
> > +};
> > +
> > +/*
> > + * Compare strings s and keyword. Return number of matching chars on
> > + * match, 0 for no match. Match must be at least n chars (n == 0 all
> > + * of keyword), otherwise it's not a match. Use match_case for case
> > + * sensitive matching.
> > + */
> > +static size_t
> > +stringcmp (const char *s, const char *keyword, size_t n, bool match_case)
> > +{
> > +    size_t i;
> > +
> > +    for (i = 0; *s && *keyword; i++, s++, keyword++) {
> > +	if (match_case) {
> > +	    if (*s != *keyword)
> > +		break;
> > +	} else {
> > +	    if (tolower ((unsigned char) *s) !=
> > +		tolower ((unsigned char) *keyword))
> > +		break;
> > +	}
> > +    }
> > +
> > +    if (n)
> > +	return i < n ? 0 : i;
> > +    else
> > +	return *keyword ? 0 : i;
> > +}
> > +
> > +/*
> > + * Parse a keyword. Return < 0 on error, number of parsed chars on
> > + * success.
> > + */
> > +static ssize_t
> > +parse_keyword (struct state *state, const char *s)
> > +{
> > +    unsigned int i;
> > +    size_t n, max_n = 0;
> > +    struct keyword *kw = NULL;
> > +    int r;
> > +
> > +    /* Match longest keyword */
> > +    for (i = 0; i < ARRAY_SIZE (keywords); i++) {
> > +	/* Match case if keyword begins with upper case letter. */
> > +	bool mcase = isupper ((unsigned char) keywords[i].name[0]);
> > +
> > +	n = stringcmp (s, keywords[i].name, keywords[i].minlen, mcase);
> > +	if (n > max_n) {
> > +	    max_n = n;
> > +	    kw = &keywords[i];
> > +	}
> > +    }
> > +
> > +    if (!kw)
> > +	return -PARSE_TIME_ERR_KEYWORD;
> > +
> > +    if (kw->set)
> > +	r = kw->set (state, kw);
> > +    else
> > +	r = kw_set_default (state, kw);
> > +
> > +    return r < 0 ? r : max_n;
> > +}
> > +
> > +/*
> > + * Non-keyword parsers and their helpers.
> > + */
> > +
> > +static int
> > +set_user_tz (struct state *state, char sign, int hour, int min)
> > +{
> > +    int tz = hour * 60 + min;
> > +
> > +    assert (sign == '+' || sign == '-');
> > +
> > +    if (hour < 0 || hour > 14 || min < 0 || min > 60 || min % 15)
> > +	return -PARSE_TIME_ERR_INVALIDTIME;
> > +
> > +    if (sign == '-')
> > +	tz = -tz;
> > +
> > +    return set_field (state, TM_TZ, tz);
> > +}
> > +
> > +/*
> > + * Independent parsing of a postponed number when it wasn't consumed
> > + * during parsing of the following token.
> > + *
> > + * This should be able to trust that last_field and next_field are
> > + * right.
> > + */
> > +static int
> > +parse_postponed_number (struct state *state, int v, int n)
> > +{
> > +    /*
> > +     * alright, these are really lone, won't affect parsing of
> > +     * following items... it's not a multiplier, those have been eaten
> > +     * away.
> > +     *
> > +     * also note numbers eaten away by parse_single_number.
> > +     */
> > +
> > +    assert (n < 8);
> > +
> > +    switch (n) {
> > +    case 1:
> > +    case 2:
> > +	/* hour or mday or year */
> > +	if (state->last_field == TM_ABS_MON &&  /* FIXME: written mon! */
> > +	    !is_field_set (state, TM_ABS_MDAY)) {
> > +	    return set_field (state, TM_ABS_MDAY, v);
> > +	}
> > +	break;
> > +    case 4:
> > +	/* YYYY or +/-HHMM for TZ or HHMM or DDMM */
> > +	/* FIXME: state->delim is no longer right for this function!
> > +	 * why not, it could be! */
> > +	if (!is_field_set (state, TM_ABS_YEAR)) {
> > +	    /* FIXME: check year? */
> > +	    return set_field (state, TM_ABS_YEAR, v);
> > +	}
> > +	break;
> > +    case 6:
> > +	/* FIXME: HHMMSS or DDMMYY */
> > +	break;
> > +    case -1:
> > +	/* REVISIT */
> > +	break;
> > +    case 3:
> > +    case 5:
> > +    case 7:
> > +    default:
> > +	break;
> > +    }
> > +
> > +    return -PARSE_TIME_ERR_FORMAT;
> > +}
> > +
> > +/* Parse a single number. Typically postpone parsing until later. */
> > +static int
> > +parse_single_number (struct state *state, unsigned long v,
> > +		     unsigned long n)
> > +{
> > +    assert (n);
> > +
> > +    /* parse things that can be parsed immediately */
> > +    if (n == 8) {
> > +	/* YYYYMMDD */
> > +	int year = v / 10000;
> > +	int mon = (v / 100) % 100;
> > +	int mday = v % 100;
> > +
> > +	if (year < 1970 || mon < 1 || mon > 12 || mday < 1 || mday > 31)
> > +	    return -PARSE_TIME_ERR_INVALIDDATE;
> 
> I think dates are checked for validity in more than one place. It might
> be worth pulling that out into a function. In particular, someone might
> want to check mday depending on month at some point.

Agreed.

> 
> > +
> > +	return set_abs_date (state, year, mon, mday);
> > +    } else if (n > 8) {
> > +	/* FIXME: seconds since epoch */
> > +	return -PARSE_TIME_ERR_FORMAT;
> > +    }
> 
> This is probably an important FIXME for notmuch for backward compatibility.

Nice to have, but not hugely important; this does not affect the
existing <timestamp>..<timestamp> (without "date:" prefix!) search.

> 
> > +
> > +    if (v > INT_MAX)
> > +	return -PARSE_TIME_ERR_FORMAT;
> > +
> > +    return set_postponed_number (state, v, n);
> > +}
> > +
> > +static bool
> > +is_time_sep (char c)
> > +{
> > +    return c == ':';
> > +}
> > +
> > +static bool
> > +is_date_sep (char c)
> > +{
> > +    return c == '/' || c == '-' || c == '.';
> > +}
> > +
> > +static bool
> > +is_sep (char c)
> > +{
> > +    return is_time_sep (c) || is_date_sep (c);
> > +}
> > +
> > +/* two-digit year: 00...69 is 2000s, 70...99 1900s, if n == 0 keep unset */
> > +static int
> > +expand_year (unsigned long year, size_t n)
> > +{
> > +    if (n == 2) {
> > +	return (year < 70 ? 2000 : 1900) + year;
> > +    } else if (n == 4) {
> > +	return year;
> > +    } else {
> > +	return UNSET;
> > +    }
> > +}
> > +
> > +static int
> > +parse_date (struct state *state, char sep,
> > +	    unsigned long v1, unsigned long v2, unsigned long v3,
> > +	    size_t n1, size_t n2, size_t n3)
> > +{
> > +    int year = UNSET, mon = UNSET, mday = UNSET;
> > +
> > +    assert (is_date_sep (sep));
> > +
> > +    switch (sep) {
> > +    case '/': /* Date: M[M]/D[D][/YY[YY]] or M[M]/YYYY */
> > +	if (n1 != 1 && n1 != 2)
> > +	    return -PARSE_TIME_ERR_DATEFORMAT;
> > +
> > +	if ((n2 == 1 || n2 == 2) && (n3 == 0 || n3 == 2 || n3 == 4)) {
> > +	    /* M[M]/D[D][/YY[YY]] */
> > +	    year = expand_year (v3, n3);
> > +	    mon = v1;
> > +	    mday = v2;
> > +	} else if (n2 == 4 && n3 == 0) {
> > +	    /* M[M]/YYYY */
> > +	    year = v2;
> > +	    mon = v1;
> > +	} else {
> > +	    return -PARSE_TIME_ERR_DATEFORMAT;
> > +	}
> > +	break;
> > +
> > +    case '-': /* Date: YYYY-MM[-DD] or DD-MM[-YY[YY]] or MM-YYYY */
> > +	if (n1 == 4 && n2 == 2 && (n3 == 0 || n3 == 2)) {
> > +	    /* YYYY-MM[-DD] */
> > +	    year = v1;
> > +	    mon = v2;
> > +	    if (n3)
> > +		mday = v3;
> > +	} else if (n1 == 2 && n2 == 2 && (n3 == 0 || n3 == 2 || n3 == 4)) {
> > +	    /* DD-MM[-YY[YY]] */
> > +	    year = expand_year (v3, n3);
> > +	    mon = v2;
> > +	    mday = v1;
> > +	} else if (n1 == 2 && n2 == 4 && n3 == 0) {
> > +	    /* MM-YYYY */
> > +	    year = v2;
> > +	    mon = v1;
> > +	} else {
> > +	    return -PARSE_TIME_ERR_DATEFORMAT;
> > +	}
> > +	break;
> > +
> > +    case '.': /* Date: D[D].M[M][.[YY[YY]]] */
> > +	if ((n1 != 1 && n1 != 2) || (n2 != 1 && n2 != 2) ||
> > +	    (n3 != 0 && n3 != 2 && n3 != 4))
> > +	    return -PARSE_TIME_ERR_DATEFORMAT;
> > +
> > +	year = expand_year (v3, n3);
> > +	mon = v2;
> > +	mday = v1;
> > +	break;
> > +    }
> > +
> > +    if (year != UNSET && year < 1970)
> > +	return -PARSE_TIME_ERR_INVALIDDATE;
> > +
> > +    if (mon != UNSET && (mon < 1 || mon > 12))
> > +	return -PARSE_TIME_ERR_INVALIDDATE;
> > +
> > +    if (mday != UNSET && (mday < 1 || mday > 31))
> > +	return -PARSE_TIME_ERR_INVALIDDATE;
> > +
> > +    return set_abs_date (state, year, mon, mday);
> > +}
> > +
> > +static int
> > +parse_time (struct state *state, char sep,
> > +	    unsigned long v1, unsigned long v2, unsigned long v3,
> > +	    size_t n1, size_t n2, size_t n3)
> > +{
> > +    assert (is_time_sep (sep));
> > +
> > +    if ((n1 != 1 && n1 != 2) || n2 != 2 || (n3 != 0 && n3 != 2))
> > +	return -PARSE_TIME_ERR_TIMEFORMAT;
> > +
> > +    /*
> > +     * REVISIT: this means it's required to set time *before* being
> > +     * able to set timezone
> > +     */
> > +    if (is_field_set (state, TM_ABS_HOUR) &&
> > +	is_field_set (state, TM_ABS_MIN) &&
> > +	n1 == 2 && n2 == 2 && n3 == 0 &&
> > +	(state->delim == '+' || state->delim == '-')) {
> > +	return set_user_tz (state, state->delim, v1, v2);
> > +    }
> > +
> > +    if (v1 > 24 || v2 > 60 || v3 > 60)
> > +	return -PARSE_TIME_ERR_INVALIDTIME;
> 
> Are the > rather than >= deliberate here (i.e. do you mean to allow 60
> for minutes or seconds)?

For hours it's deliberate (24:00 is the end of day), for minutes and
seconds it's accidental. Fixed.

> 
> > +
> > +    return set_abs_time (state, v1, v2, n3 ? v3 : 0);
> > +}
> > +
> > +/* strtoul helper that assigns length */
> > +static unsigned long
> > +strtoul_len (const char *s, const char **endp, size_t *len)
> > +{
> > +    unsigned long val = strtoul (s, (char **) endp, 10);
> > +
> > +    *len = *endp - s;
> > +    return val;
> > +}
> > +
> > +/*
> > + * Parse a (group of) number(s). Return < 0 on error, number of parsed
> > + * chars on success.
> > + */
> > +static ssize_t
> > +parse_number (struct state *state, const char *s)
> > +{
> > +    int r;
> > +    unsigned long v1, v2, v3 = 0;
> > +    size_t n1, n2, n3 = 0;
> > +    const char *p = s;
> > +    char sep;
> > +
> > +    v1 = strtoul_len (p, &p, &n1);
> > +
> > +    if (is_sep (*p) && isdigit ((unsigned char) *(p + 1))) {
> > +	sep = *p;
> > +	v2 = strtoul_len (p + 1, &p, &n2);
> > +    } else {
> > +	/* a single number */
> > +	r = parse_single_number (state, v1, n1);
> > +	if (r)
> > +	    return r;
> > +
> > +	return p - s;
> > +    }
> > +
> > +    /* a group of two or three numbers? */
> > +    if (*p == sep && isdigit ((unsigned char) *(p + 1)))
> > +	v3 = strtoul_len (p + 1, &p, &n3);
> > +
> > +    if (is_time_sep (sep))
> > +	r = parse_time (state, sep, v1, v2, v3, n1, n2, n3);
> > +    else
> > +	r = parse_date (state, sep, v1, v2, v3, n1, n2, n3);
> > +
> > +    if (r)
> > +	return r;
> > +
> > +    return p - s;
> > +}
> > +
> > +/*
> > + * Parse delimiter(s). Return < 0 on error, number of parsed chars on
> > + * success.
> > + */
> > +static ssize_t
> > +parse_delim (struct state *state, const char *s)
> > +{
> > +    const char *p = s;
> > +
> > +    /*
> > +     * REVISIT: any actions depending on the first delim after last
> > +     * field? what could it be?
> > +     */
> > +
> > +    /*
> > +     * skip non-alpha and non-digit, and store the last for further
> > +     * processing
> > +     */
> > +    while (*p && !isalnum ((unsigned char) *p)) {
> > +	set_delim (state, *p);
> > +	p++;
> > +    }
> > +
> > +    return p - s;
> > +}
> > +
> > +/*
> > + * Parse a date/time string. Return < 0 on error, number of parsed
> > + * chars on success.
> > + */
> > +static ssize_t
> > +parse_input (struct state *state, const char *s)
> > +{
> > +    const char *p = s;
> > +    ssize_t n;
> > +    int r;
> > +
> > +    while (*p) {
> > +	if (isalpha ((unsigned char) *p)) {
> > +	    n = parse_keyword (state, p);
> > +	} else if (isdigit ((unsigned char) *p)) {
> > +	    n = parse_number (state, p);
> > +	} else {
> > +	    n = parse_delim (state, p);
> > +	}
> > +
> > +	if (n <= 0) {
> > +	    if (n == 0)
> > +		n = -PARSE_TIME_ERR;
> > +
> > +	    return n;             /* FIXME */
> > +	}
> > +
> > +	p += n;
> > +    }
> > +
> > +    /* parse postponed number, if any */
> > +    r = handle_postponed_number (state);
> > +    if (r < 0)
> > +	return r;
> > +
> > +    return p - s;
> > +}
> > +
> > +/*
> > + * Processing the parsed input.
> > + */
> > +
> > +/*
> > + * Initialize reference time to tm. Use time zone in state if
> > + * specified, otherwise local time. Use now for reference time if
> > + * non-NULL, otherwise current time.
> > + */
> > +static int
> > +initialize_now (struct state *state, struct tm *tm, const time_t *now)
> > +{
> > +    time_t t;
> > +
> > +    if (now) {
> > +	t = *now;
> > +    } else {
> > +	if (time (&t) == (time_t) -1)
> > +	    return -PARSE_TIME_ERR_LIB;
> > +    }
> > +
> > +    if (is_field_set (state, TM_TZ)) {
> > +	/* some other time zone */
> > +
> > +	/* adjust now according to the TZ */
> > +	t += get_field (state, TM_TZ) * 60;
> > +
> > +	/* it's not gm, but this doesn't mess with the tz */
> > +	if (gmtime_r (&t, tm) == NULL)
> > +	    return -PARSE_TIME_ERR_LIB;
> > +    } else {
> > +	/* local time */
> > +	if (localtime_r (&t, tm) == NULL)
> > +	    return -PARSE_TIME_ERR_LIB;
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> > +/*
> > + * Normalize tm according to mktime(3). Both mktime(3) and
> > + * localtime_r(3) use local time, but they cancel each other out here,
> > + * making this function agnostic to time zone.
> > + */
> > +static int
> > +normalize_tm (struct tm *tm)
> > +{
> > +    time_t t = mktime (tm);
> > +
> > +    if (t == (time_t) -1)
> > +	return -PARSE_TIME_ERR_LIB;
> > +
> > +    if (!localtime_r (&t, tm))
> > +	return -PARSE_TIME_ERR_LIB;
> > +
> > +    return 0;
> > +}
> > +
> > +/* Get field out of a struct tm. */
> > +static int
> > +tm_get_field (const struct tm *tm, enum field field)
> > +{
> > +    switch (field) {
> > +    case TM_ABS_SEC:	return tm->tm_sec;
> > +    case TM_ABS_MIN:	return tm->tm_min;
> > +    case TM_ABS_HOUR:	return tm->tm_hour;
> > +    case TM_ABS_MDAY:	return tm->tm_mday;
> > +    case TM_ABS_MON:	return tm->tm_mon + 1; /* 0- to 1-based */
> > +    case TM_ABS_YEAR:	return 1900 + tm->tm_year;
> > +    case TM_ABS_WDAY:	return tm->tm_wday;
> > +    case TM_ABS_ISDST:	return tm->tm_isdst;
> > +    default:
> > +	assert (false);
> > +	break;
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> > +/* Modify hour according to am/pm setting. */
> > +static int
> > +fixup_ampm (struct state *state)
> > +{
> > +    int hour, hdiff = 0;
> > +
> > +    if (!is_field_set (state, TM_AMPM))
> > +	return 0;
> > +
> > +    if (!is_field_set (state, TM_ABS_HOUR))
> > +	return -PARSE_TIME_ERR_TIMEFORMAT;
> > +
> > +    hour = get_field (state, TM_ABS_HOUR);
> > +    if (hour < 1 || hour > 12)
> > +	return -PARSE_TIME_ERR_INVALIDTIME;
> > +
> > +    if (get_field (state, TM_AMPM)) {
> > +	/* 12pm is noon */
> > +	if (hour != 12)
> > +	    hdiff = 12;
> > +    } else {
> > +	/* 12am is midnight, beginning of day */
> > +	if (hour == 12)
> > +	    hdiff = -12;
> > +    }
> > +
> > +    mod_field (state, TM_REL_HOUR, -hdiff);
> > +
> > +    return 0;
> > +}
> > +
> > +/* Combine absolute and relative fields, and round. */
> > +static int
> > +create_output (struct state *state, time_t *t_out, const time_t *tnow,
> > +	       int round)
> > +{
> > +    struct tm tm = { 0 };
> > +    struct tm now;
> > +    enum field f;
> > +    int r;
> > +    int week_round = PARSE_TIME_NO_ROUND;
> > +
> > +    r = initialize_now (state, &now, tnow);
> > +    if (r)
> > +	return r;
> > +
> > +    /* initialize uninitialized fields to now */
> > +    for (f = TM_ABS_SEC; f != TM_NONE; f = next_field (f)) {
> > +	if (state->set[f] == FIELD_NOW) {
> > +	    state->tm[f] = tm_get_field (&now, f);
> > +	    state->set[f] = FIELD_SET;
> > +	}
> > +    }
> > +
> > +    /*
> > +     * If MON is set but YEAR is not, refer to past month.
> > +     *
> > +     * REVISIT: Why are month/week special in this regard? What about
> > +     * mday, or time. Should refer to past.
> > +     */
> > +    if (is_field_set (state, TM_ABS_MON) &&
> > +	!is_field_set (state, TM_ABS_YEAR)) {
> > +	if (get_field (state, TM_ABS_MON) >= tm_get_field (&now, TM_ABS_MON))
> > +	    mod_field (state, TM_REL_YEAR, 1);
> > +    }
> > +
> > +    /*
> > +     * If WDAY is set but MDAY is not, we consider WDAY relative
> > +     *
> > +     * REVISIT: This fails on stuff like "two months ago monday"
> > +     * because two months ago wasn't the same day as today. Postpone
> > +     * until we know date?
> > +     */
> > +    if (is_field_set (state, TM_ABS_WDAY) &&
> > +	!is_field_set (state, TM_ABS_MDAY)) {
> > +	int wday = get_field (state, TM_ABS_WDAY);
> > +	int today = tm_get_field (&now, TM_ABS_WDAY);
> > +	int rel_days;
> > +
> > +	if (today > wday)
> > +	    rel_days = today - wday;
> > +	else
> > +	    rel_days = today + 7 - wday;
> > +
> > +	/* this also prevents special week rounding from happening */
> > +	mod_field (state, TM_REL_DAY, rel_days);
> > +
> > +	unset_field (state, TM_ABS_WDAY);
> > +    }
> > +
> > +    r = fixup_ampm (state);
> > +    if (r)
> > +	return r;
> > +
> > +    /*
> > +     * Iterate fields from least accurate to most accurate, and set
> > +     * unset fields according to requested rounding.
> > +     */
> > +    for (f = TM_ABS_SEC; f != TM_NONE; f = next_field (f)) {
> > +	if (round != PARSE_TIME_NO_ROUND) {
> > +	    enum field r = abs_to_rel_field (f);
> 
> The comment and the code seem to disagree on the ordering.

Thanks, fixed.

> 
> > +
> > +	    if (is_field_set (state, f) || is_field_set (state, r)) {
> > +		if (round >= PARSE_TIME_ROUND_UP)
> > +		    mod_field (state, r, -1);
> > +		round = PARSE_TIME_NO_ROUND; /* no more rounding */
> > +	    } else {
> > +		if (f == TM_ABS_MDAY &&
> > +		    is_field_set (state, TM_REL_WEEK)) {
> > +		    /* week is most accurate */
> > +		    week_round = round;
> > +		    round = PARSE_TIME_NO_ROUND;
> > +		} else {
> > +		    set_field (state, f, field_zero (f));
> > +		}
> > +	    }
> > +	}
> > +
> > +	if (!is_field_set (state, f))
> > +	    set_field (state, f, tm_get_field (&now, f));
> > +    }
> > +
> > +    /* special case: rounding with week accuracy */
> > +    if (week_round != PARSE_TIME_NO_ROUND) {
> > +	/* temporarily set more accurate fields to now */
> > +	set_field (state, TM_ABS_SEC, tm_get_field (&now, TM_ABS_SEC));
> > +	set_field (state, TM_ABS_MIN, tm_get_field (&now, TM_ABS_MIN));
> > +	set_field (state, TM_ABS_HOUR, tm_get_field (&now, TM_ABS_HOUR));
> > +	set_field (state, TM_ABS_MDAY, tm_get_field (&now, TM_ABS_MDAY));
> > +    }
> > +
> > +    /*
> > +     * set all fields. they may contain out of range values before
> > +     * normalization by mktime(3).
> > +     */
> > +    tm.tm_sec = get_field (state, TM_ABS_SEC) - get_field (state, TM_REL_SEC);
> > +    tm.tm_min = get_field (state, TM_ABS_MIN) - get_field (state, TM_REL_MIN);
> > +    tm.tm_hour = get_field (state, TM_ABS_HOUR) - get_field (state, TM_REL_HOUR);
> > +    tm.tm_mday = get_field (state, TM_ABS_MDAY) -
> > +		 get_field (state, TM_REL_DAY) - 7 * get_field (state, TM_REL_WEEK);
> > +    tm.tm_mon = get_field (state, TM_ABS_MON) - get_field (state, TM_REL_MON);
> > +    tm.tm_mon--; /* 1- to 0-based */
> > +    tm.tm_year = get_field (state, TM_ABS_YEAR) - get_field (state, TM_REL_YEAR) - 1900;
> > +
> > +    /*
> > +     * It's always normal time.
> > +     *
> > +     * REVISIT: This is probably not a solution that universally
> > +     * works. Just make sure DST is not taken into account. We don't
> > +     * want rounding to be affected by DST.
> > +     */
> > +    tm.tm_isdst = -1;
> > +
> > +    /* special case: rounding with week accuracy */
> > +    if (week_round != PARSE_TIME_NO_ROUND) {
> > +	/* normalize to get proper tm.wday */
> > +	r = normalize_tm (&tm);
> > +	if (r < 0)
> > +	    return r;
> > +
> > +	/* set more accurate fields back to zero */
> > +	tm.tm_sec = 0;
> > +	tm.tm_min = 0;
> > +	tm.tm_hour = 0;
> > +	tm.tm_isdst = -1;
> > +
> > +	/* monday is the true 1st day of week, but this is easier */
> > +	if (week_round <= PARSE_TIME_ROUND_DOWN)
> > +	    tm.tm_mday -= tm.tm_wday;
> > +	else
> > +	    tm.tm_mday += 7 - tm.tm_wday;
> > +    }
> > +
> > +    /* if TZ specified, convert from TZ to local time for mktime(3) */
> > +    if (is_field_set (state, TM_TZ)) {
> > +	time_t t = mktime (&tm);
> > +
> > +	/* from specified TZ to UTC */
> > +	tm.tm_min -= get_field (state, TM_TZ);
> > +
> > +	/* from UTC to local TZ (yes, it's hacky - FIXME) */
> > +	tm.tm_sec += difftime (mktime (localtime (&t)), mktime (gmtime (&t)));
> > +    }
> > +
> > +    /* FIXME: check return value, don't set if fail */
> > +    *t_out = mktime (&tm);
> > +
> > +    return 0;
> > +}
> > +
> > +/* internally, all errors are < 0. parse_time_string() returns errors > 0. */
> > +#define EXTERNAL_ERR(r) (-r)
> > +
> > +int
> > +parse_time_string (const char *s, time_t *t, const time_t *now, int round)
> > +{
> > +    struct state state = { { 0 } };
> > +    int r;
> > +
> > +    if (!s || !t)
> > +	return EXTERNAL_ERR (-PARSE_TIME_ERR);
> > +
> > +    r = parse_input (&state, s);
> > +    if (r < 0)
> > +	return EXTERNAL_ERR (r);
> > +
> > +    r = create_output (&state, t, now, round);
> > +    if (r < 0)
> > +	return EXTERNAL_ERR (r);
> > +
> > +    return 0;
> > +}
> > diff --git a/lib/parse-time-string.h b/lib/parse-time-string.h
> > new file mode 100644
> > index 0000000..50b7c6f
> > --- /dev/null
> > +++ b/lib/parse-time-string.h
> > @@ -0,0 +1,95 @@
> > +/*
> > + * parse time string - user friendly date and time parser
> > + * Copyright © 2012 Jani Nikula
> > + *
> > + * This program is free software: you can redistribute it and/or modify
> > + * it under the terms of the GNU General Public License as published by
> > + * the Free Software Foundation, either version 2 of the License, or
> > + * (at your option) any later version.
> > + *
> > + * This program is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > + * GNU General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU General Public License
> > + * along with this program.  If not, see <http://www.gnu.org/licenses/>.
> > + *
> > + * Author: Jani Nikula <jani@nikula.org>
> > + */
> > +
> > +#ifndef PARSE_TIME_STRING_H
> > +#define PARSE_TIME_STRING_H
> > +
> > +#ifdef __cplusplus
> > +extern "C" {
> > +#endif
> > +
> > +#include <time.h>
> > +
> > +/* return values for parse_time_string() */
> > +enum {
> > +    PARSE_TIME_OK = 0,
> > +    PARSE_TIME_ERR,		/* unspecified error */
> > +    PARSE_TIME_ERR_LIB,		/* library call failed */
> > +    PARSE_TIME_ERR_ALREADYSET,	/* attempt to set unit twice */
> > +    PARSE_TIME_ERR_FORMAT,	/* generic date/time format error */
> > +    PARSE_TIME_ERR_DATEFORMAT,	/* date format error */
> > +    PARSE_TIME_ERR_TIMEFORMAT,	/* time format error */
> > +    PARSE_TIME_ERR_INVALIDDATE,	/* date value error */
> > +    PARSE_TIME_ERR_INVALIDTIME,	/* time value error */
> > +    PARSE_TIME_ERR_KEYWORD,	/* unknown keyword */
> > +};
> > +
> > +/* round values for parse_time_string() */
> > +enum {
> > +    PARSE_TIME_ROUND_DOWN = -1,
> > +    PARSE_TIME_NO_ROUND = 0,
> > +    PARSE_TIME_ROUND_UP = 1,
> > +};
> > +
> > +/**
> > + * parse_time_string() - user friendly date and time parser
> > + * @s:		string to parse
> > + * @t:		pointer to time_t to store parsed time in
> > + * @now:	pointer to time_t containing reference date/time, or NULL
> > + * @round:	PARSE_TIME_NO_ROUND, PARSE_TIME_ROUND_DOWN, or
> > + *		PARSE_TIME_ROUND_UP
> > + *
> > + * Parse a date/time string 's' and store the parsed date/time result
> > + * in 't'.
> > + *
> > + * A reference date/time is used for determining the "date/time units"
> > + * (roughly equivalent to struct tm members) not specified by 's'. If
> > + * 'now' is non-NULL, it must contain a pointer to a time_t to be used
> > + * as reference date/time. Otherwise, the current time is used.
> > + *
> > + * If 's' does not specify a full date/time, the 'round' parameter
> > + * specifies if and how the result should be rounded as follows:
> > + *
> > + *   PARSE_TIME_NO_ROUND: All date/time units that are not specified
> > + *   by 's' are set to the corresponding unit derived from the
> > + *   reference date/time.
> > + *
> > + *   PARSE_TIME_ROUND_DOWN: All date/time units that are more accurate
> > + *   than the most accurate unit specified by 's' are set to the
> > + *   smallest valid value for that unit. Rest of the unspecified units
> > + *   are set as in PARSE_TIME_NO_ROUND.
> > + *
> > + *   PARSE_TIME_ROUND_UP: All date/time units that are more accurate
> > + *   than the most accurate unit specified by 's' are set to the
> > + *   smallest valid value for that unit. The most accurate unit
> > + *   specified by 's' is incremented by one (and this is rolled over
> > + *   to the less accurate units as necessary). Rest of the unspecified
> > + *   units are set as in PARSE_TIME_NO_ROUND.
> > + *
> > + * Return 0 (PARSE_TIME_OK) for succesfully parsed date/time, or one
> > + * of PARSE_TIME_ERR_* on error. 't' is not modified on error.
> > + */
> > +int parse_time_string (const char *s, time_t *t, const time_t *now, int round);
> > +
> > +#ifdef __cplusplus
> > +}
> > +#endif
> > +
> > +#endif /* PARSE_TIME_STRING_H */
> > -- 
> > 1.7.5.4
> > 
> > _______________________________________________
> > notmuch mailing list
> > notmuch@notmuchmail.org
> > http://notmuchmail.org/mailman/listinfo/notmuch

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC PATCH 0/2] natural language date range search
  2012-02-25 19:53   ` Jani Nikula
@ 2012-02-27  7:45     ` Tomi Ollila
  0 siblings, 0 replies; 8+ messages in thread
From: Tomi Ollila @ 2012-02-27  7:45 UTC (permalink / raw)
  To: Jani Nikula, notmuch

On Sat, 25 Feb 2012 21:53:27 +0200, Jani Nikula <jani@nikula.org> wrote:
> On Sat, 25 Feb 2012 17:05:44 +0200, Tomi Ollila <tomi.ollila@iki.fi> wrote:

[ ... ]

> > 
> > By seeing the thoughts thrown in IRC there seems to be plenty if things
> > to resolve until something like this is going to be available in stock
> > notmuch. In the meanwhile I provide some ideas into the soup; maybe
> > our collective mind can have some use of this.
> > 
> > 
> > Q: Could 'date:timestr' be converted to 'date:timestr..timestr' ?
> 
> AFAICT this would require the custom query parser.

So, maybe someday... :)

> > In this idea -<timestr> means relative time and <timestr> absolute
> > time. The the time string consists of number and letter and assume
> > the above suggestion for date:timestr (<- == date:timestr..timestr)
> > Letters are s seconds  h hours  d days  w weeks  m months (more
> > useful than for minutes) and  y years.
> 
> I'll put it bluntly: show me the code! ;)

I would not have expected nothing less ;)

> I'll comment below how your examples can be expressed with working code
> in this series, just for comparison, and to show what can be done with
> this.

Great! Those features suits to my needs just fine; last day is most often
needed, then parhaps I jump to one week. 5w is good for ~one month. 
More "absolute" times (like last november / since last november) are so
seldomly needed that writing a bit more is not an issue :D

[ ... ]

> 
> BR,
> Jani.

Tomi

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2012-02-27  7:45 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-02-19 22:55 [RFC PATCH 0/2] natural language date range search Jani Nikula
2012-02-19 22:55 ` [RFC PATCH 1/2] lib: add date/time parser Jani Nikula
2012-02-26  8:45   ` Mark Walters
2012-02-26 20:39     ` Jani Nikula
2012-02-19 22:55 ` [RFC PATCH 2/2] lib: add date range search Jani Nikula
2012-02-25 15:05 ` [RFC PATCH 0/2] natural language " Tomi Ollila
2012-02-25 19:53   ` Jani Nikula
2012-02-27  7:45     ` Tomi Ollila

Code repositories for project(s) associated with this public inbox

	https://yhetil.org/notmuch.git/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).