diff --git a/lisp/calendar/parse-date.el b/lisp/calendar/parse-date.el new file mode 100644 index 0000000000..10bd939e91 --- /dev/null +++ b/lisp/calendar/parse-date.el @@ -0,0 +1,472 @@ +;;; parse-date.el --- parsing time/date strings -*- lexical-binding: t -*- + +;; Copyright (C) 2021 Free Software Foundation, Inc. + +;; Author: Bob Rogers +;; Keywords: util + +;; This file is part of GNU Emacs. + +;; GNU Emacs is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; GNU Emacs is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs. If not, see . + +;;; Commentary: + +;; 'parse-date' parses a time and/or date in a string and returns a +;; list of values, just like `decode-time', where unspecified elements +;; in the string are returned as nil (except unspecified DST is +;; returned as -1). `encode-time' may be applied on these values to +;; obtain an internal time value. If left to its own devices, it +;; accepts a wide variety of formats, but can be told to insist on a +;; particular date/time format. + +;; Historically, `parse-time-string' was used for this purpose, but it +;; was focused on email date formats, and gradually but imperfectly +;; extended to handle other formats. 'parse-date' is compatible in +;; that it parses the same input formats and uses the same return +;; value format, but is stricter in that it signals an error for +;; tokens that `parse-time-string' would simply ignore. + +;;; TODO: +;; +;; * Add a euro-date format for DD/MM/YYYY ? +;; + +;;; Code: + +(require 'cl-lib) +(require 'iso8601) +(require 'parse-time) + +(define-error 'date-parse-error "Date/time parse error" 'error) + +(defconst parse-date--ends-with-alpha-tz-re + (concat " \\(" (mapconcat #'car parse-time-zoneinfo "\\|") "\\)$") + "Recognize an alphanumeric timezone at the end of the string.") + +(defun parse-date--guess-rfc822-formats (date-string) + (let ((case-fold-search t)) + (cond ((string-match "(" date-string) 'rfc2822) + ((string-match parse-date--ends-with-alpha-tz-re date-string) + ;; Alphabetic timezones are legacy syntax. + 'rfc822) + ((string-match " [-+][0-9][0-9][0-9][0-9][ \t\n]*\\($\\}(\\)" + date-string) + ;; Note that an ISO-8601 timezone has a colon in the middle + ;; and no preceding space. + 'rfc2822) + (t nil)))) + +(defun parse-date--guess-format (date-string) + (cond ((iso8601-valid-p date-string) 'iso-8601) + ((parse-date--guess-rfc822-formats date-string)) + (t nil))) + +(defun parse-date--ignore-char? (char) + ;; Ignore whitespace and commas. + (or (eq char ?\ ) (eq char ?\t) (eq char ?\r) (eq char ?\n) (eq char ?,))) + +(defun parse-date--tokenize-string (string &optional strip-fws?) + "Turn STRING into tokens, separated only by whitespace and commas. +Multiple commas are ignored. Pure digit sequences are turned +into integers. If STRIP-FWS? is true, then folding whitespace as +defined by RFC2822 (strictly, the CFWS production that also +accepts comments) is stripped out by treating it like whitespace; +if it's value is the symbol `first', we exit when we see the +first '(' (per RFC2822), else we strip them all (per RFC822)." + (let ((index 0) + (end (length string)) + (fws-eof? (eq strip-fws? 'first)) + (list ())) + (when fws-eof? + ;; In order to stop on the first "(", we need to see it as + ;; non-whitespace. + (setq strip-fws? nil)) + (cl-flet ((skip-ignored () + ;; Skip ignored characters at index (the scan + ;; position). Skip RFC822 comments in matched parens + ;; if strip-fws? is true, but do not complain about + ;; unterminated comments. + (let ((char nil) + (nest 0)) + (while (and (< index end) + (setq char (aref string index)) + (or (> nest 0) + (parse-date--ignore-char? char) + (and strip-fws? (eql char ?\()))) + (cl-incf index) + ;; FWS bookkeeping. + (cond ((not strip-fws?)) + ((and (eq char ?\\) + (< (1+ index) end)) + ;; Move to the next char but don't check + ;; it to see if it might be a paren. + (cl-incf index)) + ((eq char ?\() (cl-incf nest)) + ((eq char ?\)) (cl-decf nest))))))) + (skip-ignored) ;; Skip leading whitespace. + (while (and (< index end) + (not (and fws-eof? + (eq (aref string index) ?\()))) + (let* ((start index) + (char (aref string index)) + (all-digits (<= ?0 char ?9))) + ;; char is valid; look for more valid characters. + (when (and strip-fws? + (eq char ?\\) + (< (1+ index) end)) + ;; Escaped character, which might be a "(". If so, we are + ;; correct to include it in the token, even though the + ;; caller is sure to barf. If not, we violate RFC2?822 by + ;; not removing the backslash, but no characters in valid + ;; RFC2?822 dates need escaping anyway, so it shouldn't + ;; matter that this is not done strictly correctly. -- + ;; rgr, 24-Dec-21. + (cl-incf index)) + (while (and (< (cl-incf index) end) + (setq char (aref string index)) + (not (or (parse-date--ignore-char? char) + (and strip-fws? + (eq char ?\())))) + (unless (<= ?0 char ?9) + (setq all-digits nil)) + (when (and strip-fws? + (eq char ?\\) + (< (1+ index) end)) + ;; Escaped character, see above. + (cl-incf index))) + (push (if all-digits + (cl-parse-integer string :start start :end index) + (substring string start index)) + list) + (skip-ignored))) + (nreverse list)))) + +(defconst parse-date--slot-names + '(second minute hour day month year weekday dst zone) + "Names of return value slots, for better error messages +See the decoded-time defstruct.") + +(defconst parse-date--slot-ranges + '((0 60) (0 59) (0 23) (1 31) (1 12) (1 9999)) + "Numeric slot ranges, for bounds checking. +Note that RFC2822 explicitly requires that seconds go up to 60, +to allow for leap seconds (see Mills, D., 'Network Time +Protocol', STD 12, RFC 1119, September 1989).") + +(defun parse-date--x822 (time-string obs-format?) + ;; Parse an RFC2822 or (if obs-format? is true) RFC822 date. The + ;; strict syntax for the former is as follows: + ;; + ;; [ day-of-week "," ] day FWS month-name FWS year FWS time [CFWS] + ;; + ;; where "time" is: + ;; + ;; 2DIGIT ":" 2DIGIT [ ":" 2DIGIT ] FWS ( "+" / "-" ) 4DIGIT + ;; + ;; RFC822 also accepts comments in random places (which is handled + ;; by parse-date--tokenize-string) and two-digit years. We are + ;; somewhat more lax in what we accept (specifically, the hours + ;; don't have to be two digits, and the TZ and the comma after the + ;; DOW are optional), but we do insist that the items that are + ;; present do appear in this order. + (let ((tokens (parse-date--tokenize-string (downcase time-string) + (if obs-format? 'all 'first))) + (time (list nil nil nil nil nil nil nil -1 nil))) + (cl-labels ((set-matched-slot (slot index token) + ;; Assign a slot value from match data if index is + ;; non-nil, else from token, signalling an error if + ;; it's already been assigned or is out of range. + (let ((value (if index + (cl-parse-integer (match-string index token)) + token)) + (range (nth slot parse-date--slot-ranges))) + (unless (equal (nth slot time) + (if (= slot 7) -1 nil)) + (signal 'date-parse-error + (list "Duplicate slot value" + (nth slot parse-date--slot-names) token))) + (when (and range + (not (<= (car range) value (cadr range)))) + (signal 'date-parse-error + (list "Slot out of range" + (nth slot parse-date--slot-names) + token (car range) (cadr range)))) + (setf (nth slot time) value))) + (set-numeric (slot token) + (unless (natnump token) + (signal 'date-parse-error + (list "Not a number" + (nth slot parse-date--slot-names) token))) + (set-matched-slot slot nil token))) + ;; Check for weekday. + (let ((dow (assoc (car tokens) parse-time-weekdays))) + (when dow + ;; Day of the week. + (set-matched-slot 6 nil (cdr dow)) + (pop tokens))) + ;; Day. + (set-numeric 3 (pop tokens)) + ;; Alphabetic month. + (let* ((month (pop tokens)) + (match (assoc month parse-time-months))) + (if match + (set-matched-slot 4 nil (cdr match)) + (signal 'date-parse-error + (list "Expected an alphabetic month" month)))) + ;; Year. + (let ((year (pop tokens))) + ;; Check the year for the right number of digits. + (cond ((> year 1000) + (set-numeric 5 year)) + ((or (not obs-format?) + (>= year 100)) + "Four digit years are required but found '%s'" year) + ((>= year 50) + ;; second half of the 20th century. + (set-numeric 5 (+ 1900 year))) + (t + ;; first half of the 21st century. + (set-numeric 5 (+ 2000 year))))) + ;; Time. + (let ((time (pop tokens))) + (cond ((or (null time) (natnump time)) + (signal 'date-parse-error + (list "Expected a time" time))) + ((string-match + "^\\([0-9][0-9]?\\):\\([0-9][0-9]\\):\\([0-9][0-9]\\)$" + time) + (set-matched-slot 2 1 time) + (set-matched-slot 1 2 time) + (set-matched-slot 0 3 time)) + ((string-match "^\\([0-9][0-9]?\\):\\([0-9][0-9]\\)$" time) + ;; Time without seconds. + (set-matched-slot 2 1 time) + (set-matched-slot 1 2 time) + (set-matched-slot 0 nil 0)) + (t + (signal 'date-parse-error + (list "Expected a time" time))))) + ;; Timezone. + (let* ((zone (pop tokens)) + (match (assoc zone parse-time-zoneinfo))) + (cond (match + (set-matched-slot 8 nil (cadr match)) + (set-matched-slot 7 nil (caddr match))) + ((and (stringp zone) + (string-match "^[-+][0-9][0-9][0-9][0-9]$" zone)) + ;; Numeric time zone. + (set-matched-slot + 8 nil + (* 60 + (+ (cl-parse-integer zone :start 3 :end 5) + (* 60 (cl-parse-integer zone :start 1 :end 3))) + (if (= (aref zone 0) ?-) -1 1)))) + (zone + (signal 'date-parse-error + (list "Expected a timezone" zone))))) + (when tokens + (signal 'date-parse-error + (list "Extra token(s)" (car tokens))))) + time)) + +(defun parse-date--default (time-string two-digit-year?) + ;; Do the standard parsing thing. This is mostly free form, in that + ;; tokens may appear in any order, but we expect to introduce some + ;; state dependence. + (let ((tokens (parse-date--tokenize-string (downcase time-string))) + (time (list nil nil nil nil nil nil nil -1 nil))) + (cl-flet ((set-matched-slot (slot index token) + ;; Assign a slot value from match data if index is + ;; non-nil, else from token, signalling an error if + ;; it's already been assigned or is out of range. + (let ((value (if index + (cl-parse-integer (match-string index token)) + token)) + (range (nth slot parse-date--slot-ranges))) + (unless (equal (nth slot time) + (if (= slot 7) -1 nil)) + (signal 'date-parse-error + (list "Duplicate slot value" + (nth slot parse-date--slot-names) token))) + (when (and range + (not (<= (car range) value (cadr range)))) + (signal 'date-parse-error + (list "Slot out of range" + (nth slot parse-date--slot-names) + token (car range) (cadr range)))) + (setf (nth slot time) value)))) + (while tokens + (let ((token (pop tokens)) + (match nil)) + (cond ((numberp token) + ;; A bare number could be a month, day, or year. + ;; The order of these tests matters greatly. + (cond ((>= token 1000) + (set-matched-slot 5 nil token)) + ((and (<= 1 token 31) + (not (nth 3 time))) + ;; Assume days come before months or years. + (set-matched-slot 3 nil token)) + ((and (<= 1 token 12) + (not (nth 4 time))) + ;; Assume days come before years. + (set-matched-slot 4 nil token)) + ((or (nth 5 time) + (not two-digit-year?) + (> token 100)) + (signal 'date-parse-error + (list "Unrecognized token" token))) + ;; It's a two-digit year. + ((>= token 50) + ;; second half of the 20th century. + (set-matched-slot 5 nil (+ 1900 token))) + (t + ;; first half of the 21st century. + (set-matched-slot 5 nil (+ 2000 token))))) + ((setq match (assoc token parse-time-weekdays)) + (set-matched-slot 6 nil (cdr match))) + ((setq match (assoc token parse-time-months)) + (set-matched-slot 4 nil (cdr match))) + ((setq match (assoc token parse-time-zoneinfo)) + (set-matched-slot 8 nil (cadr match)) + (set-matched-slot 7 nil (caddr match))) + ((string-match "^[-+][0-9][0-9][0-9][0-9]$" token) + ;; Numeric time zone. + (set-matched-slot + 8 nil + (* 60 + (+ (cl-parse-integer token :start 3 :end 5) + (* 60 (cl-parse-integer token :start 1 :end 3))) + (if (= (aref token 0) ?-) -1 1)))) + ((string-match + "^\\([0-9][0-9][0-9][0-9]\\)[-/]\\([0-9][0-9]?\\)[-/]\\([0-9][0-9]?\\)$" + token) + ;; ISO-8601-style date (YYYY-MM-DD). + (set-matched-slot 5 1 token) + (set-matched-slot 4 2 token) + (set-matched-slot 3 3 token)) + ((string-match + "^\\([0-9][0-9]?\\)[-/]\\([0-9][0-9]?\\)[-/]\\([0-9][0-9][0-9][0-9]\\)$" + token) + ;; US date (MM-DD-YYYY), but we insist on four + ;; digits for the year. + (set-matched-slot 4 1 token) + (set-matched-slot 3 2 token) + (set-matched-slot 5 3 token)) + ((string-match + "^\\([0-9][0-9]?\\):\\([0-9][0-9]\\):\\([0-9][0-9]\\)$" + token) + (set-matched-slot 2 1 token) + (set-matched-slot 1 2 token) + (set-matched-slot 0 3 token)) + ((string-match "^\\([0-9][0-9]?\\):\\([0-9][0-9]\\)$" token) + ;; Time without seconds. + (set-matched-slot 2 1 token) + (set-matched-slot 1 2 token) + (set-matched-slot 0 nil 0)) + ((member token '("am" "pm")) + (unless (nth 2 time) + (signal 'date-parse-error + (list "Missing time" token))) + (unless (<= (nth 2 time) 12) + (signal 'date-parse-error + (list "Time already past noon" token))) + (when (equal token "pm") + (cl-incf (nth 2 time) 12))) + (t + (signal 'date-parse-error + (list "Unrecognized token" token))))))) + time)) + +;;;###autoload +(cl-defgeneric parse-date (time-string &optional format) + "Parse TIME-STRING according to FORMAT, returning a list. +The FORMAT value is a symbol that may be one of the following: + + iso-8601 => parse the string according to the ISO-8601 +standard. See `parse-iso8601-time-string'. + + rfc822 => parse an RFC822 (old email) date, which allows +two-digit years and internal '()' comments. In dates of the form +'11 Jan 12', the 11 is assumed to be the day, and the 12 is +assumed to mean 2012. Be sure you really want this; the format +is more limited than most human-supplied dates. + + rfc2822 => parse an RFC2822 (new email) date, which allows +only four-digit years. Again, this is a fairly restricted +format, with fields required to be in a specified order and +representation. + + us-date => parse a US-style date, of the form MM/DD/YYYY, but +allowing two-digit years. In dates of the form '01/11/12', the 1 +is the month, 11 is the day, and the 12 is assumed to mean 2012. + + nil => like us-date with two-digit years disallowed. + +Anything else is treated as iso-8601 if it looks similar, else +us-date with two-digit years disallowed. + + * For all formats except iso-8601, parsing is case-insensitive. + + * Commas and whitespace are ignored. + + * In date specifications, either '/' or '-' may be used to +separate components, but all three components must be given. + + * A date that starts with four digits is YYYY-MM-DD, ISO-8601 +style, but a date that ends with four digits is MM-DD-YYYY [at +least in us-date format]. + + * Two digit years, when allowed, are in the 1900's when +between 50 and 99 inclusive and in the 2000's when between 0 and +49 inclusive. + +A `date-parse-error' is signalled when time values are duplicated, +unrecognized, or out of range. No consistency checks between +fields are done. For instance, the weekday is not checked to see +that it corresponds to the date, and parse-date complains about +the 32nd of March (or any other month) but blithely accepts the +29th of February in non-leap years -- or the 31st of February in +any year. + +The result is a list of (SEC MIN HOUR DAY MON YEAR DOW DST TZ), +which can be accessed as a decoded-time defstruct (q.v.), +e.g. `decoded-time-year' to extract the year, and turned into an +Emacs timestamp by `encode-time'. The values returned are +identical to those of `decode-time', but any unknown values other +than DST are returned as nil, and an unknown DST value is +returned as -1.") + +(cl-defmethod parse-date (time-string (_format (eql iso-8601))) + (iso8601-parse time-string)) + +(cl-defmethod parse-date (time-string (_format (eql rfc2822))) + (parse-date--x822 time-string nil)) + +(cl-defmethod parse-date (time-string (_format (eql rfc822))) + (parse-date--x822 time-string t)) + +(cl-defmethod parse-date (time-string (_format (eql us-date))) + (parse-date--default time-string t)) + +(cl-defmethod parse-date (time-string (_format (eql nil))) + (parse-date--default time-string nil)) + +(cl-defmethod parse-date (time-string _format) + ;; Re-dispatch after guessing the format. + (parse-date time-string (parse-date--guess-format time-string))) + +(provide 'parse-date) + +;;; parse-date.el ends here diff --git a/test/lisp/calendar/parse-date-tests.el b/test/lisp/calendar/parse-date-tests.el new file mode 100644 index 0000000000..bd2b344d71 --- /dev/null +++ b/test/lisp/calendar/parse-date-tests.el @@ -0,0 +1,247 @@ +;;; parse-date-tests.el --- Test suite for parse-date.el -*- lexical-binding:t -*- + +;; Copyright (C) 2016-2021 Free Software Foundation, Inc. + +;; Author: Lars Ingebrigtsen + +;; This file is part of GNU Emacs. + +;; GNU Emacs is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; GNU Emacs is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs. If not, see . + +;;; Commentary: + +;;; Code: + +(require 'ert) +(require 'parse-date) + +(ert-deftest parse-date-tests () + "Test basic parse-date functionality." + + ;; Test tokenization. + (should (equal (parse-date--tokenize-string " ") '())) + (should (equal (parse-date--tokenize-string " a b") '("a" "b"))) + (should (equal (parse-date--tokenize-string "a bbc dde") '("a" "bbc" "dde"))) + (should (equal (parse-date--tokenize-string " , a 27 b,, c 14:32 ") + '("a" 27 "b" "c" "14:32"))) + ;; Some folding whitespace tests. + (should (equal (parse-date--tokenize-string " a b (end) c" 'first) + '("a" "b"))) + (should (equal (parse-date--tokenize-string "(quux)a (foo (bar)) b(baz)" t) + '("a" "b"))) + (should (equal (parse-date--tokenize-string "a b\\cde" 'all) + ;; Strictly incorrect, but strictly unnecessary syntax. + '("a" "b\\cde"))) + (should (equal (parse-date--tokenize-string "a b\\ de" 'all) + '("a" "b\\ de"))) + (should (equal (parse-date--tokenize-string "a \\de \\(f" 'all) + '("a" "\\de" "\\(f"))) + + ;; Start with some compatible RFC822 dates. + (dolist (format '(nil rfc822 rfc2822)) + (should (equal (parse-date "Mon, 22 Feb 2016 19:35:42 +0100" format) + '(42 35 19 22 2 2016 1 -1 3600))) + (should (equal (parse-date "22 Feb 2016 19:35:42 +0100" format) + '(42 35 19 22 2 2016 nil -1 3600))) + (should (equal (parse-date "Mon, 22 February 2016 19:35:42 +0100" format) + '(42 35 19 22 2 2016 1 -1 3600))) + (should (equal (parse-date "Mon, 22 feb 2016 19:35:42 +0100" format) + '(42 35 19 22 2 2016 1 -1 3600))) + (should (equal (parse-date "Monday, 22 february 2016 19:35:42 +0100" format) + '(42 35 19 22 2 2016 1 -1 3600))) + (should (equal (parse-date "Monday, 22 february 2016 19:35:42 PST" format) + '(42 35 19 22 2 2016 1 nil -28800))) + (should (equal (parse-date "Friday, 21 Sep 2018 13:47:58 PDT" format) + '(58 47 13 21 9 2018 5 t -25200))) + (should (equal (parse-date "Friday, 21 Sep 2018 13:47:58" format) + '(58 47 13 21 9 2018 5 -1 nil)))) + ;; These are not allowed by the default format. + (should (equal (parse-date "22 Feb 16 19:35:42 +0100" 'rfc822) + '(42 35 19 22 2 2016 nil -1 3600))) + (should (equal (parse-date "22 Feb 96 19:35:42 +0100" 'rfc822) + '(42 35 19 22 2 1996 nil -1 3600))) + ;; Try them again with comments. + (should (equal (parse-date "22 Feb (today) 16 19:35:42 +0100" 'rfc822) + '(42 35 19 22 2 2016 nil -1 3600))) + (should (equal (parse-date "22 Feb 96 (long ago) 19:35:42 +0100" 'rfc822) + '(42 35 19 22 2 1996 nil -1 3600))) + (should (equal (parse-date + "Friday, 21 Sep(comment \\) with \\( parens)18 19:35:42" + 'rfc822) + '(42 35 19 21 9 2018 5 -1 nil))) + (should (equal (parse-date + "Friday, 21 Sep 18 19:35:42 (unterminated comment" + 'rfc822) + '(42 35 19 21 9 2018 5 -1 nil))) + + ;; Test some RFC822 error cases + (dolist (test '(("33 1 2022" ("Slot out of range" day 33 1 31)) + ("0 1 2022" ("Slot out of range" day 0 1 31)) + ("1 1 2020 2021" ("Expected an alphabetic month" 1)) + ("1 Jan 2020 2021" ("Expected a time" 2021)) + ("1 Jan 2020 20:21 2000" ("Expected a timezone" 2000)) + ("1 Jan 2020 20:21 +0200 33" ("Extra token(s)" 33)))) + (should (equal (condition-case err (parse-date (car test) 'rfc822) + (date-parse-error (cdr err))) + (cadr test)))) + + ;; And these are not allowed by rfc822 because of missing time. + (should (equal (parse-date "Friday, 21 Sep 2018" nil) + '(nil nil nil 21 9 2018 5 -1 nil))) + (should (equal (parse-date "22 Feb 2016 +0100" nil) + '(nil nil nil 22 2 2016 nil -1 3600))) + + ;; Test the default format with both hyphens and slashes in dates. + (dolist (case '(;; Month can be numeric if date uses hyphens/slashes. + ("Friday, 2018-09-21" (nil nil nil 21 9 2018 5 -1 nil)) + ;; Year can come last if four digits. + ("Friday, 9-21-2018" (nil nil nil 21 9 2018 5 -1 nil)) + ;; Day of week is optional + ("2018-09-21" (nil nil nil 21 9 2018 nil -1 nil)) + ;; The order of date, time, etc., does not matter. + ("13:47:58, +0100, 2018-09-21, Friday" + (58 47 13 21 9 2018 5 -1 3600)) + ;; Month, day, or both, can be a single digit. + ("Friday, 2018-9-08" (nil nil nil 8 9 2018 5 -1 nil)) + ("Friday, 2018-09-8" (nil nil nil 8 9 2018 5 -1 nil)) + ("Friday, 2018-9-8" (nil nil nil 8 9 2018 5 -1 nil)))) + (let ((string (car case)) + (expected (cadr case))) + ;; Test with hyphens. + (should (equal (parse-date string nil) expected)) + (while (string-match "-" string) + (setq string (replace-match "/" t t string))) + ;; Test with slashes. + (should (equal (parse-date string nil) expected)))) + + ;; Time by itself is recognized as such. + (should (equal (parse-date "03:47:58" nil) + '(58 47 3 nil nil nil nil -1 nil))) + ;; A leading zero for hours is optional. + (should (equal (parse-date "3:47:58" nil) + '(58 47 3 nil nil nil nil -1 nil))) + ;; Missing seconds are assumed to be zero. + (should (equal (parse-date "3:47" nil) + '(0 47 3 nil nil nil nil -1 nil))) + ;; AM/PM are understood (in any case combination). + (dolist (am '(am AM Am)) + (should (equal (parse-date (format "3:47 %s" am) nil) + '(0 47 3 nil nil nil nil -1 nil)))) + (dolist (pm '(pm PM Pm)) + (should (equal (parse-date (format "3:47 %s" pm) nil) + '(0 47 15 nil nil nil nil -1 nil)))) + + ;; Ensure some cases fail. + (should-error (parse-date "22 Feb 196" 'us-date)) + (should-error (parse-date "22 Feb 16 19:35:42" nil)) + (should-error (parse-date "22 Feb 96 19:35:42" nil)) ;; two-digit year + (should-error (parse-date "2 Feb 2021 1996" nil)) ;; duplicate year + + (dolist (test '(("22 Feb 196" 'us-date ;; bad year + ("Unrecognized token" 196)) + ("22 Feb 16 19:35:42" nil ;; two-digit year + ("Unrecognized token" 16)) + ("22 Feb 96 19:35:42" nil ;; two-digit year + ("Unrecognized token" 96)) + ("2 Feb 2021 1996" nil + ("Duplicate slot value" year 1996)) + ("2020-1-1 2021" nil + ("Duplicate slot value" year 2021)) + ("22 Feb 196" 'us-date + ("Unrecognized token" 196)) + ("22 Feb 16 19:35:42" nil + ("Unrecognized token" 16)) + ("22 Feb 96 19:35:42" nil + ("Unrecognized token" 96)) + ("2 Feb 2021 1996" nil + ("Duplicate slot value" year 1996)) + ("2020-1-1 30" nil + ("Unrecognized token" 30)) + ("2020-1-1 12" nil + ("Unrecognized token" 12)) + ("15:47 15:15" nil + ("Duplicate slot value" hour "15:15")) + ("2020-1-1 +0800 -0800" t + ("Duplicate slot value" zone -28800)) + ("15:47 PM" nil + ("Time already past noon" "pm")) + ("15:47 AM" nil + ("Time already past noon" "am")) + ("2020-1-1 PM" nil + ("Missing time" "pm")) + ;; Range tests + ("2021-12-32" nil + ("Slot out of range" day "2021-12-32" 1 31)) + ("2021-12-0" nil + ("Slot out of range" day "2021-12-0" 1 31)) + ("2021-13-3" nil + ("Slot out of range" month "2021-13-3" 1 12)) + ("0000-12-3" nil + ("Slot out of range" year "0000-12-3" 1 9999)) + ("20021 Dec 3" nil + ("Slot out of range" year 20021 1 9999)) + ("24:21:14" nil + ("Slot out of range" hour "24:21:14" 0 23)) + ("14:60:21" nil + ("Slot out of range" minute "14:60:21" 0 59)) + ("14:21:61" nil + ("Slot out of range" second "14:21:61" 0 60)))) + (should (equal (condition-case err (parse-date (car test) (cadr test)) + (date-parse-error (cdr err))) + (caddr test)))) + (should (equal (parse-date "14:21:60" nil) ;; a leap second! + '(60 21 14 nil nil nil nil -1 nil))) + + ;; Test ISO-8601 dates. + (dolist (format '(t iso-8601)) + (should (equal (parse-date "1998-09-12T12:21:54-0200" format) + '(54 21 12 12 9 1998 nil nil -7200))) + (should (equal (format-time-string + "%Y-%m-%d %H:%M:%S" + (encode-time + (parse-date "1998-09-12T12:21:54-0230" format)) + t) + "1998-09-12 14:51:54")) + (should (equal (format-time-string + "%Y-%m-%d %H:%M:%S" + (encode-time + (parse-date "1998-09-12T12:21:54-02:00" format)) + t) + "1998-09-12 14:21:54")) + (should (equal (format-time-string + "%Y-%m-%d %H:%M:%S" + (encode-time + (parse-date "1998-09-12T12:21:54-02" format)) + t) + "1998-09-12 14:21:54")) + (should (equal (format-time-string + "%Y-%m-%d %H:%M:%S" + (encode-time + (parse-date "1998-09-12T12:21:54+0230" format)) + t) + "1998-09-12 09:51:54")) + (should (equal (format-time-string + "%Y-%m-%d %H:%M:%S" + (encode-time + (parse-date "1998-09-12T12:21:54+02" format)) + t) + "1998-09-12 10:21:54")) + (should (equal (parse-date "1998-09-12T12:21:54Z" t) + '(54 21 12 12 9 1998 nil nil 0))) + (should (equal (parse-date "1998-09-12T12:21:54" format) + '(54 21 12 12 9 1998 nil -1 nil))))) + +(provide 'parse-date-tests) + +;;; parse-date-tests.el ends here