unofficial mirror of emacs-devel@gnu.org 
 help / color / mirror / code / Atom feed
From: Paul Eggert <eggert@cs.ucla.edu>
To: "Mattias Engdegård" <mattiase@acm.org>
Cc: Emacs developers <emacs-devel@gnu.org>
Subject: Re: Regexp scan of Emacs (April 19)
Date: Fri, 19 Apr 2019 09:04:55 -0700	[thread overview]
Message-ID: <031ec3d9-a8ad-e656-b0ac-465ec00a285d@cs.ucla.edu> (raw)
In-Reply-To: <90232AC2-3228-4C8F-AD84-FFB6A30F51AF@acm.org>

[-- Attachment #1: Type: text/plain, Size: 498 bytes --]

On 4/19/19 2:39 AM, Mattias Engdegård wrote:
> This is the latest scan of errors and oddities in regexps in the Emacs source tree.
> New this time is an experimental check for branch subsumption: whether one branch in an or-expression matches a superset of another, like "[ab]\\|a". Please tell me if you believe this might be useful, so that I know whether to include it in the next release of xr.

Thanks, these all look useful to me. I installed the attached patch to
fix the glitches.


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-Fix-regexp-branches-that-subsume-other-branches.patch --]
[-- Type: text/x-patch; name="0001-Fix-regexp-branches-that-subsume-other-branches.patch", Size: 6499 bytes --]

From 872ec904253e2399bcf772f7995c363ca0f8a262 Mon Sep 17 00:00:00 2001
From: Paul Eggert <eggert@cs.ucla.edu>
Date: Fri, 19 Apr 2019 09:00:04 -0700
Subject: [PATCH] Fix regexp branches that subsume other branches
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Problems reported by Mattias Engdegård in:
https://lists.gnu.org/r/emacs-devel/2019-04/msg00803.html
* lisp/arc-mode.el (archive-rar-summarize):
* lisp/eshell/em-hist.el (eshell-hist-word-designator):
* lisp/info.el (Info-dir-remove-duplicates):
* lisp/international/ja-dic-cnv.el (skkdic-convert-postfix)
(skkdic-convert-prefix, skkdic-collect-okuri-nasi):
* lisp/progmodes/cc-awk.el (c-awk-esc-pair-re):
* lisp/xml.el (xml-att-type-re):
Omit regexp branches that subsume other branches.
* lisp/progmodes/cperl-mode.el (cperl-beautify-regexp-piece):
$ and ^ aren’t simple-codes.
---
 lisp/arc-mode.el                 |  2 +-
 lisp/eshell/em-hist.el           |  2 +-
 lisp/info.el                     |  2 +-
 lisp/international/ja-dic-cnv.el | 10 +++++-----
 lisp/progmodes/cc-awk.el         |  2 +-
 lisp/progmodes/cperl-mode.el     |  2 +-
 lisp/xml.el                      |  1 -
 7 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/lisp/arc-mode.el b/lisp/arc-mode.el
index 6a58d61a54..1c88f9a1a1 100644
--- a/lisp/arc-mode.el
+++ b/lisp/arc-mode.el
@@ -2019,7 +2019,7 @@ archive-rar-summarize
       (re-search-forward "^\\(\s+=+\s*\\)+\n")
       (while (looking-at (concat "^\s+[0-9.]+\s+D?-+\s+"   ; Flags
                                  "\\([0-9-]+\\)\s+"        ; Size
-                                 "\\([-0-9.%]+\\|-+\\)\s+" ; Ratio
+                                 "\\([-0-9.%]+\\)\s+"      ; Ratio
                                  "\\([0-9a-zA-Z]+\\)\s+"   ; Mode
                                  "\\([0-9-]+\\)\s+"        ; Date
                                  "\\([0-9:]+\\)\s+"        ; Time
diff --git a/lisp/eshell/em-hist.el b/lisp/eshell/em-hist.el
index 614faaa131..adb028002b 100644
--- a/lisp/eshell/em-hist.el
+++ b/lisp/eshell/em-hist.el
@@ -153,7 +153,7 @@ eshell-hist-event-designator
   :group 'eshell-hist)
 
 (defcustom eshell-hist-word-designator
-  "^:?\\([0-9]+\\|[$^%*]\\)?\\(\\*\\|-[0-9]*\\|[$^%*]\\)?"
+  "^:?\\([0-9]+\\|[$^%*]\\)?\\(-[0-9]*\\|[$^%*]\\)?"
   "The regexp used to identify history word designators."
   :type 'regexp
   :group 'eshell-hist)
diff --git a/lisp/info.el b/lisp/info.el
index f3b413a2f9..2e5f433dc8 100644
--- a/lisp/info.el
+++ b/lisp/info.el
@@ -1531,7 +1531,7 @@ Info-dir-remove-duplicates
 	    (save-restriction
 	      (narrow-to-region start (point))
 	      (goto-char (point-min))
-	      (while (re-search-forward "^\\* \\([^:\n]+:\\(:\\|[^.\n]+\\).\\)" nil 'move)
+	      (while (re-search-forward "^\\* \\([^:\n]+:[^.\n]+.\\)" nil 'move)
 		;; Fold case straight away; `member-ignore-case' here wasteful.
 		(let ((x (downcase (match-string 1))))
 		  (if (member x seen)
diff --git a/lisp/international/ja-dic-cnv.el b/lisp/international/ja-dic-cnv.el
index 578cd63a59..e721083189 100644
--- a/lisp/international/ja-dic-cnv.el
+++ b/lisp/international/ja-dic-cnv.el
@@ -124,7 +124,7 @@ skkdic-convert-postfix
 	(setq l (cdr l)))))
 
   ;; Search postfix entries.
-  (while (re-search-forward "^[#<>?]\\(\\(\\cH\\|ー\\)+\\) " nil t)
+  (while (re-search-forward "^[#<>?]\\(\\cH+\\) " nil t)
     (let ((kana (match-string-no-properties 1))
 	  str candidates)
       (while (looking-at "/[#0-9 ]*\\([^/\n]*\\)/")
@@ -157,7 +157,7 @@ skkdic-convert-prefix
     (insert ";; Setting prefix entries.\n"
 	    "(skkdic-set-prefix\n"))
   (save-excursion
-    (while (re-search-forward "^\\(\\(\\cH\\|ー\\)+\\)[<>?] " nil t)
+    (while (re-search-forward "^\\(\\cH+\\)[<>?] " nil t)
       (let ((kana (match-string-no-properties 1))
 	    str candidates)
 	(while (looking-at "/\\([^/\n]+\\)/")
@@ -275,11 +275,11 @@ skkdic-collect-okuri-nasi
     (let ((progress (make-progress-reporter "Collecting OKURI-NASI entries"
                                             (point) (point-max)
                                             nil 10)))
-      (while (re-search-forward "^\\(\\(\\cH\\|ー\\)+\\) \\(/\\cj.*\\)/$"
+      (while (re-search-forward "^\\(\\cH+\\) \\(/\\cj.*\\)/$"
 				nil t)
         (let ((kana (match-string-no-properties 1))
-	      (candidates (skkdic-get-candidate-list (match-beginning 3)
-						     (match-end 3))))
+	      (candidates (skkdic-get-candidate-list (match-beginning 2)
+						     (match-end 2))))
 	  (setq skkdic-okuri-nasi-entries
 		(cons (cons kana candidates) skkdic-okuri-nasi-entries))
           (progress-reporter-update progress (point))
diff --git a/lisp/progmodes/cc-awk.el b/lisp/progmodes/cc-awk.el
index 70aa3c4b1f..1a67a95927 100644
--- a/lisp/progmodes/cc-awk.el
+++ b/lisp/progmodes/cc-awk.el
@@ -95,7 +95,7 @@ awk-mode-syntax-table
 ;; Emacs has in the past used \r to mark hidden lines in some fashion (and
 ;; maybe still does).
 
-(defconst c-awk-esc-pair-re "\\\\\\(.\\|\n\\|\r\\|\\'\\)")
+(defconst c-awk-esc-pair-re "\\\\\\(.\\|\n\\|\\'\\)")
 ;;   Matches any escaped (with \) character-pair, including an escaped newline.
 (defconst c-awk-non-eol-esc-pair-re "\\\\\\(.\\|\\'\\)")
 ;;   Matches any escaped (with \) character-pair, apart from an escaped newline.
diff --git a/lisp/progmodes/cperl-mode.el b/lisp/progmodes/cperl-mode.el
index 73b55e29a5..ba007d67c0 100644
--- a/lisp/progmodes/cperl-mode.el
+++ b/lisp/progmodes/cperl-mode.el
@@ -7983,7 +7983,7 @@ cperl-beautify-regexp-piece
 		       "\\|"		; $ ^
 		       "[$^]"
 		       "\\|"		; simple-code simple-code*?
-		       "\\(\\\\.\\|[^][()#|*+?\n]\\)\\([*+{?]\\??\\)?" ; 4 5
+		       "\\(\\\\.\\|[^][()#|*+?$^\n]\\)\\([*+{?]\\??\\)?" ; 4 5
 		       "\\|"		; Class
 		       "\\(\\[\\)"	; 6
 		       "\\|"		; Grouping
diff --git a/lisp/xml.el b/lisp/xml.el
index b5b923f863..1f3c05f4d9 100644
--- a/lisp/xml.el
+++ b/lisp/xml.el
@@ -245,7 +245,6 @@ xml-enumerated-type-re
 ;; [54] AttType    ::= StringType | TokenizedType | EnumeratedType
 ;; [55] StringType ::= 'CDATA'
 (defconst xml-att-type-re (concat "\\(?:CDATA\\|" xml-tokenized-type-re
-				  "\\|" xml-notation-type-re
 				  "\\|" xml-enumerated-type-re "\\)"))
 
 ;; [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
-- 
2.20.1


  parent reply	other threads:[~2019-04-19 16:04 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-04-19  9:39 Regexp scan of Emacs (April 19) Mattias Engdegård
2019-04-19 12:42 ` Michael Albinus
2019-04-19 16:04 ` Paul Eggert [this message]
2019-04-19 20:29   ` Mattias Engdegård

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://www.gnu.org/software/emacs/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=031ec3d9-a8ad-e656-b0ac-465ec00a285d@cs.ucla.edu \
    --to=eggert@cs.ucla.edu \
    --cc=emacs-devel@gnu.org \
    --cc=mattiase@acm.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).