From 57556fa21da431b47b2f3842ddb61273f5d39820 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Fri, 4 Oct 2019 14:38:22 -0700 Subject: [PATCH] Fix bugs found by 2019-09-29 regexp scanner MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Problems reported by Mattias Engdegård in: https://lists.gnu.org/r/emacs-devel/2019-09/threads.html * lisp/calendar/iso8601.el (iso8601--year-match) (iso8601--full-date-match, iso8601--without-day-match) (iso8601--week-date-match, iso8601--ordinal-date-match) (iso8601--zone-match): * lisp/textmodes/rst.el (rst-re-alist-def): Put ‘-’ at the end of bracketed ranges, following the style suggestion in the Elisp manual. (iso8601--time-match): Use \([0-9]*\) instead of \([0-9]+\)? to pacify the regexp scanner. (iso8601-parse-time): Adjust accordingly. * lisp/language/burmese.el (burmese-composable-pattern): * lisp/language/indian.el (devanagari-composable-pattern) (bengali-composable-pattern, gurmukhi-composable-pattern) (gujarati-composable-pattern, oriya-composable-pattern) (telugu-composable-pattern, kannada-composable-pattern) (malayalam-composable-pattern): Prefer [ab] to [a-b] when the characters differ by 1, to pacify the regexp scanner. * lisp/language/burmese.el (burmese-composable-pattern): Fix missing-‘\u’ typos. * lisp/language/indian.el (gurmukhi-composable-pattern): Fix missing-‘\’ typo. * lisp/language/tibetan.el (tibetan-regexp): Quote ‘+’ in regexp to pacify the regexp scanner. Simplify. * lisp/textmodes/rst.el (rst-re-alist-def): Fix ‘[]-'...]’ typo by putting the ‘-’ at end of the bracketed expression. --- lisp/calendar/iso8601.el | 18 ++++++------- lisp/language/burmese.el | 4 +-- lisp/language/indian.el | 58 ++++++++++++++++++++-------------------- lisp/language/tibetan.el | 23 +++++++--------- lisp/textmodes/rst.el | 8 +++--- 5 files changed, 54 insertions(+), 57 deletions(-) diff --git a/lisp/calendar/iso8601.el b/lisp/calendar/iso8601.el index 3ff91d910c..78a94d47be 100644 --- a/lisp/calendar/iso8601.el +++ b/lisp/calendar/iso8601.el @@ -62,17 +62,17 @@ iso8601--concat-regexps regexps "\\|")) (defconst iso8601--year-match - "\\([-+]\\)?\\([0-9][0-9][0-9][0-9]\\)") + "\\([+-]\\)?\\([0-9][0-9][0-9][0-9]\\)") (defconst iso8601--full-date-match - "\\([-+]\\)?\\([0-9][0-9][0-9][0-9]\\)-?\\([0-9][0-9]\\)-?\\([0-9][0-9]\\)") + "\\([+-]\\)?\\([0-9][0-9][0-9][0-9]\\)-?\\([0-9][0-9]\\)-?\\([0-9][0-9]\\)") (defconst iso8601--without-day-match - "\\([-+]\\)?\\([0-9][0-9][0-9][0-9]\\)-\\([0-9][0-9]\\)") + "\\([+-]\\)?\\([0-9][0-9][0-9][0-9]\\)-\\([0-9][0-9]\\)") (defconst iso8601--outdated-date-match "--\\([0-9][0-9]\\)-?\\([0-9][0-9]\\)") (defconst iso8601--week-date-match - "\\([-+]\\)?\\([0-9][0-9][0-9][0-9]\\)-?W\\([0-9][0-9]\\)-?\\([0-9]\\)?") + "\\([+-]\\)?\\([0-9][0-9][0-9][0-9]\\)-?W\\([0-9][0-9]\\)-?\\([0-9]\\)?") (defconst iso8601--ordinal-date-match - "\\([-+]\\)?\\([0-9][0-9][0-9][0-9]\\)-?\\([0-9][0-9][0-9]\\)") + "\\([+-]\\)?\\([0-9][0-9][0-9][0-9]\\)-?\\([0-9][0-9][0-9]\\)") (defconst iso8601--date-match (iso8601--concat-regexps (list iso8601--year-match @@ -83,10 +83,10 @@ iso8601--date-match iso8601--ordinal-date-match))) (defconst iso8601--time-match - "\\([0-9][0-9]\\):?\\([0-9][0-9]\\)?:?\\([0-9][0-9]\\)?[.,]?\\([0-9]+\\)?") + "\\([0-9][0-9]\\):?\\([0-9][0-9]\\)?:?\\([0-9][0-9]\\)?[.,]?\\([0-9]*\\)") (defconst iso8601--zone-match - "\\(Z\\|\\([-+]\\)\\([0-9][0-9]\\):?\\([0-9][0-9]\\)?\\)") + "\\(Z\\|\\([+-]\\)\\([0-9][0-9]\\):?\\([0-9][0-9]\\)?\\)") (defconst iso8601--full-time-match (concat "\\(" (replace-regexp-in-string "(" "(?:" iso8601--time-match) "\\)" @@ -142,7 +142,7 @@ iso8601-parse (defun iso8601-parse-date (string) "Parse STRING (in ISO 8601 format) and return a `decode-time' value." (cond - ;; Just a year: [-+]YYYY. + ;; Just a year: [+-]YYYY. ((iso8601--match iso8601--year-match string) (iso8601--decoded-time :year (iso8601--adjust-year (match-string 1 string) @@ -236,7 +236,7 @@ iso8601-parse-time (string-to-number (match-string 2 time)))) (second (and (match-string 3 time) (string-to-number (match-string 3 time)))) - (fraction (and (match-string 4 time) + (fraction (and (not (zerop (length (match-string 4 time)))) (string-to-number (match-string 4 time))))) (when (and fraction (eq form t)) diff --git a/lisp/language/burmese.el b/lisp/language/burmese.el index 25425ec485..7f2a99a41a 100644 --- a/lisp/language/burmese.el +++ b/lisp/language/burmese.el @@ -39,11 +39,11 @@ (defvar burmese-composable-pattern (let ((table '(("K" . "[\u1004\u105A]\u103A\u1039") ; KINZI sequence - ("C" . "[\u1000-\u102A\u103F\u1041-\u1049\u104E\u105A-\u105D\u1061\u1065-\u1066\u106E\u1071\u1075\u1081\u108E\uAA60-\uAA6F\uAA71-\uAA76]") ; consonant and vowel letter + ("C" . "[\u1000-\u102A\u103F\u1041-\u1049\u104E\u105A-\u105D\u1061\u1065\u1066\u106E\u1071\u1075\u1081\u108E\uAA60-\uAA6F\uAA71-\uAA76]") ; consonant and vowel letter ("V" . "\u1039") ; VIRAMA ("A" . "\u103A") ; ASAT ("S" . "[\u1000-\u1019\u101C\u101E\u1020\u1021\u105A]") ; subscript - ("M" . "[\u103B-\u103E\105E-\1060]") ; medial + ("M" . "[\u103B-\u103E\u105E-\u1060]") ; medial ("v" . "[\u102B-\u103A\u103C-\u103E\u1062-\u1064\u1067-\u106D\u1071-\u1074\u1082-\u108D\u108F\u109A\u109C\uAA70]"))) ; vowel sign, etc. (regexp "\\(K\\)?C\\(VS\\)?\\(VS\\)?A?M*v*")) (let ((case-fold-search nil)) diff --git a/lisp/language/indian.el b/lisp/language/indian.el index f1e61a354c..4013faca7c 100644 --- a/lisp/language/indian.el +++ b/lisp/language/indian.el @@ -139,14 +139,14 @@ devanagari-composable-pattern (let ((table '(("a" . "[\u0900-\u0902]") ; vowel modifier (above) ("A" . "\u0903") ; vowel modifier (post) - ("V" . "[\u0904-\u0914\u0960-\u0961\u0972]") ; independent vowel + ("V" . "[\u0904-\u0914\u0960\u0961\u0972]") ; independent vowel ("C" . "[\u0915-\u0939\u0958-\u095F\u0979-\u097F]") ; consonant ("R" . "\u0930") ; RA ("n" . "\u093C") ; NUKTA - ("v" . "[\u093E-\u094C\u094E\u0955\u0962-\u0963]") ; vowel sign + ("v" . "[\u093E-\u094C\u094E\u0955\u0962\u0963]") ; vowel sign ("H" . "\u094D") ; HALANT - ("s" . "[\u0951-\u0952]") ; stress sign - ("t" . "[\u0953-\u0954]") ; accent + ("s" . "[\u0951\u0952]") ; stress sign + ("t" . "[\u0953\u0954]") ; accent ("N" . "\u200C") ; ZWNJ ("J" . "\u200D") ; ZWJ ("X" . "[\u0900-\u097F]")))) ; all coverage @@ -166,13 +166,13 @@ devanagari-composable-pattern (defconst bengali-composable-pattern (let ((table '(("a" . "\u0981") ; SIGN CANDRABINDU - ("A" . "[\u0982-\u0983]") ; SIGN ANUSVARA .. VISARGA - ("V" . "[\u0985-\u0994\u09E0-\u09E1]") ; independent vowel + ("A" . "[\u0982\u0983]") ; SIGN ANUSVARA .. VISARGA + ("V" . "[\u0985-\u0994\u09E0\u09E1]") ; independent vowel ("C" . "[\u0995-\u09B9\u09DC-\u09DF\u09F1]") ; consonant - ("B" . "[\u09AC\u09AF-\u09B0\u09F0]") ; BA, YA, RA + ("B" . "[\u09AC\u09AF\u09B0\u09F0]") ; BA, YA, RA ("R" . "[\u09B0\u09F0]") ; RA ("n" . "\u09BC") ; NUKTA - ("v" . "[\u09BE-\u09CC\u09D7\u09E2-\u09E3]") ; vowel sign + ("v" . "[\u09BE-\u09CC\u09D7\u09E2\u09E3]") ; vowel sign ("H" . "\u09CD") ; HALANT ("T" . "\u09CE") ; KHANDA TA ("N" . "\u200C") ; ZWNJ @@ -195,11 +195,11 @@ bengali-composable-pattern (defconst gurmukhi-composable-pattern (let ((table - '(("a" . "[\u0A01-\u0A02\u0A70]") ; SIGN ADAK BINDI .. BINDI, TIPPI + '(("a" . "[\u0A01\u0A02\u0A70]") ; SIGN ADAK BINDI .. BINDI, TIPPI ("A" . "\u0A03") ; SIGN VISARGA ("V" . "[\u0A05-\u0A14]") ; independent vowel ("C" . "[\u0A15-\u0A39\u0A59-\u0A5E]") ; consonant - ("Y" . "[\u0A2F-u0A30\u0A35\u0A39]") ; YA, RA, VA, HA + ("Y" . "[\u0A2F-\u0A30\u0A35\u0A39]") ; YA, RA, VA, HA ("n" . "\u0A3C") ; NUKTA ("v" . "[\u0A3E-\u0A4C]") ; vowel sign ("H" . "\u0A4D") ; VIRAMA @@ -221,13 +221,13 @@ gurmukhi-composable-pattern (defconst gujarati-composable-pattern (let ((table - '(("a" . "[\u0A81-\u0A82]") ; SIGN CANDRABINDU .. ANUSVARA + '(("a" . "[\u0A81\u0A82]") ; SIGN CANDRABINDU .. ANUSVARA ("A" . "\u0A83") ; SIGN VISARGA - ("V" . "[\u0A85-\u0A94\u0AE0-\u0AE1]") ; independent vowel + ("V" . "[\u0A85-\u0A94\u0AE0\u0AE1]") ; independent vowel ("C" . "[\u0A95-\u0AB9]") ; consonant ("R" . "\u0AB0") ; RA ("n" . "\u0ABC") ; NUKTA - ("v" . "[\u0ABE-\u0ACC\u0AE2-\u0AE3]") ; vowel sign + ("v" . "[\u0ABE-\u0ACC\u0AE2\u0AE3]") ; vowel sign ("H" . "\u0ACD") ; VIRAMA ("N" . "\u200C") ; ZWNJ ("J" . "\u200D") ; ZWJ @@ -248,13 +248,13 @@ gujarati-composable-pattern (defconst oriya-composable-pattern (let ((table '(("a" . "\u0B01") ; SIGN CANDRABINDU - ("A" . "[\u0B02-\u0B03]") ; SIGN ANUSVARA .. VISARGA - ("V" . "[\u0B05-\u0B14\u0B60-\u0B61]") ; independent vowel - ("C" . "[\u0B15-\u0B39\u0B5C-\u0B5D\u0B71]") ; consonant - ("B" . "[\u0B15-\u0B17\u0B1B-\u0B1D\u0B1F-\u0B21\u0B23-\u0B24\u0B27-\u0B30\u0B32-\u0B35\u0B38-\u0B39]") ; consonant with below form + ("A" . "[\u0B02\u0B03]") ; SIGN ANUSVARA .. VISARGA + ("V" . "[\u0B05-\u0B14\u0B60\u0B61]") ; independent vowel + ("C" . "[\u0B15-\u0B39\u0B5C\u0B5D\u0B71]") ; consonant + ("B" . "[\u0B15-\u0B17\u0B1B-\u0B1D\u0B1F-\u0B21\u0B23\u0B24\u0B27-\u0B30\u0B32-\u0B35\u0B38\u0B39]") ; consonant with below form ("R" . "\u0B30") ; RA ("n" . "\u0B3C") ; NUKTA - ("v" . "[\u0B3E-\u0B4C\u0B56-\u0B57\u0B62-\u0B63]") ; vowel sign + ("v" . "[\u0B3E-\u0B4C\u0B56\u0B57\u0B62\u0B63]") ; vowel sign ("H" . "\u0B4D") ; VIRAMA ("N" . "\u200C") ; ZWNJ ("J" . "\u200D") ; ZWJ @@ -296,9 +296,9 @@ tamil-composable-pattern (defconst telugu-composable-pattern (let ((table '(("a" . "[\u0C01-\u0C03]") ; SIGN CANDRABINDU .. VISARGA - ("V" . "[\u0C05-\u0C14\u0C60-\u0C61]") ; independent vowel - ("C" . "[\u0C15-\u0C39\u0C58-\u0C59]") ; consonant - ("v" . "[\u0C3E-\u0C4C\u0C55-\u0C56\u0C62-\u0C63]") ; vowel sign + ("V" . "[\u0C05-\u0C14\u0C60\u0C61]") ; independent vowel + ("C" . "[\u0C15-\u0C39\u0C58\u0C59]") ; consonant + ("v" . "[\u0C3E-\u0C4C\u0C55\u0C56\u0C62\u0C63]") ; vowel sign ("H" . "\u0C4D") ; VIRAMA ("N" . "\u200C") ; ZWNJ ("J" . "\u200D") ; ZWJ @@ -318,12 +318,12 @@ telugu-composable-pattern (defconst kannada-composable-pattern (let ((table - '(("A" . "[\u0C82-\u0C83]") ; SIGN ANUSVARA .. VISARGA - ("V" . "[\u0C85-\u0C94\u0CE0-\u0CE1]") ; independent vowel - ("C" . "[\u0C95-\u0CB9\u0CDE]") ; consonant + '(("A" . "[\u0C82\u0C83]") ; SIGN ANUSVARA .. VISARGA + ("V" . "[\u0C85-\u0C94\u0CE0\u0CE1]") ; independent vowel + ("C" . "[\u0C95-\u0CB9\u0CDE]") ; consonant ("R" . "\u0CB0") ; RA ("n" . "\u0CBC") ; NUKTA - ("v" . "[\u0CBE-\u0CCC\u0CD5-\u0CD6\u0CE2-\u0CE3]") ; vowel sign + ("v" . "[\u0CBE-\u0CCC\u0CD5\u0CD6\u0CE2\u0CE3]") ; vowel sign ("H" . "\u0CCD") ; VIRAMA ("N" . "\u200C") ; ZWNJ ("J" . "\u200D") ; ZWJ @@ -343,11 +343,11 @@ kannada-composable-pattern (defconst malayalam-composable-pattern (let ((table - '(("A" . "[\u0D02-\u0D03]") ; SIGN ANUSVARA .. VISARGA - ("V" . "[\u0D05-\u0D14\u0D60-\u0D61]") ; independent vowel + '(("A" . "[\u0D02\u0D03]") ; SIGN ANUSVARA .. VISARGA + ("V" . "[\u0D05-\u0D14\u0D60\u0D61]") ; independent vowel ("C" . "[\u0D15-\u0D39]") ; consonant - ("Y" . "[\u0D2F-\u0D30\u0D32\u0D35]") ; YA, RA, LA, VA - ("v" . "[\u0D3E-\u0D4C\u0D57\u0D62-\u0D63]") ; postbase matra + ("Y" . "[\u0D2F\u0D30\u0D32\u0D35]") ; YA, RA, LA, VA + ("v" . "[\u0D3E-\u0D4C\u0D57\u0D62\u0D63]") ; postbase matra ("H" . "\u0D4D") ; SIGN VIRAMA ("N" . "\u200C") ; ZWNJ ("J" . "\u200D") ; ZWJ diff --git a/lisp/language/tibetan.el b/lisp/language/tibetan.el index 4be25cecab..b42a1e8fb8 100644 --- a/lisp/language/tibetan.el +++ b/lisp/language/tibetan.el @@ -549,19 +549,16 @@ tibetan-precomposition-rule-alist ("སྨ" . ""))) (defconst tibetan-regexp - (let ((l (list tibetan-precomposed-transcription-alist - tibetan-consonant-transcription-alist - tibetan-vowel-transcription-alist - tibetan-modifier-transcription-alist - tibetan-subjoined-transcription-alist)) - (separator "\\|") - tail pattern) - (while l - (setq tail (car l) l (cdr l)) - (while tail - (setq pattern (cons separator (cons (car (car tail)) pattern)) - tail (cdr tail)))) - (apply 'concat (nreverse (cdr pattern)))) + (let (pattern) + (dolist (alist (list tibetan-precomposed-transcription-alist + tibetan-consonant-transcription-alist + tibetan-vowel-transcription-alist + tibetan-modifier-transcription-alist + tibetan-subjoined-transcription-alist) + (apply #'concat (nreverse (cdr pattern)))) + (dolist (key-val alist) + (setq pattern (cons "\\|" (cons (regexp-quote (car key-val)) + pattern)))))) "Regexp matching a Tibetan transcription of a composable Tibetan sequence. The result of matching is to be used for indexing alists at conversion from a roman transcription to the corresponding Tibetan character.") diff --git a/lisp/textmodes/rst.el b/lisp/textmodes/rst.el index 88c44c06da..b7438fbb10 100644 --- a/lisp/textmodes/rst.el +++ b/lisp/textmodes/rst.el @@ -388,8 +388,8 @@ rst-re-alist-def ; item tag. ;; Inline markup (`ilm') - (ilm-pfx (:alt "^" hws-prt "[-'\"([{<‘“«’/:]")) - (ilm-sfx (:alt "$" hws-prt "[]-'\")}>’”»/:.,;!?\\]")) + (ilm-pfx (:alt "^" hws-prt "['\"([{<‘“«’/:-]")) + (ilm-sfx (:alt "$" hws-prt "[]'\")}>’”»/:.,;!?\\-]")) ;; Inline markup content (`ilc') (ilcsgl-tag "\\S ") ; A single non-white character. @@ -431,7 +431,7 @@ rst-re-alist-def (fld-tag ":" fldnam-tag ":") ; A field marker. ;; Options (`opt') - (optsta-tag (:alt "[-+/]" "--")) ; Start of an option. + (optsta-tag (:alt "[+/-]" "--")) ; Start of an option. (optnam-tag "\\sw" (:alt "-" "\\sw") "*") ; Name of an option. (optarg-tag (:shy "[ =]\\S +")) ; Option argument. (optsep-tag (:shy "," hws-prt)) ; Separator between options. @@ -457,7 +457,7 @@ rst-re-alist-def ; tag. ;; Symbol (`sym') - (sym-prt "[-+.:_]") ; Non-word part of a symbol. + (sym-prt "[+.:_-]") ; Non-word part of a symbol. (sym-tag (:shy "\\sw+" (:shy sym-prt "\\sw+") "*")) ;; URIs (`uri') -- 2.21.0