unofficial mirror of bug-gnu-emacs@gnu.org 
 help / color / mirror / code / Atom feed
From: Alan Mackenzie <acm@muc.de>
To: Leo Liu <sdl.web@gmail.com>
Cc: 13541@debbugs.gnu.org
Subject: bug#13541: 24.2.92; awk-mode: wrong font locking regexp literals
Date: Sun, 27 Jan 2013 18:59:06 +0000	[thread overview]
Message-ID: <20130127185906.GA16161@acm.acm> (raw)
In-Reply-To: <m2y5fgtcuu.fsf@gmail.com>

Hi, Leo.

On Sat, Jan 26, 2013 at 07:14:49PM +0800, Leo Liu wrote:
> On 2013-01-26 01:50 +0800, Alan Mackenzie wrote:
> > Could you please try out, fairly thoroughly, the following patch, and let
> > me know how it goes.  It aims to fontify a /regexp/ wherever one might
> > occur.

> The second regexp is not font-locked in this case:

> /a/ { print /abc/ }

Yes, thanks for spotting this.  The situation was more complicated than I
thought.  I think this replacement patch fixes that case (together with a
few others).  Would you try it out again, please.



=== modified file 'lisp/progmodes/cc-awk.el'
*** lisp/progmodes/cc-awk.el	2013-01-01 09:11:05 +0000
--- lisp/progmodes/cc-awk.el	2013-01-27 18:23:59 +0000
***************
*** 127,148 ****
  ;; escaped EOL.
  
  ;; REGEXPS FOR "HARMLESS" STRINGS/LINES.
- (defconst c-awk-harmless-char-re "[^_#/\"\\\\\n\r]")
- ;;   Matches any character but a _, #, /, ", \, or newline.  N.B. _" starts a
- ;; localization string in gawk 3.1
  (defconst c-awk-harmless-_ "_\\([^\"]\\|\\'\\)")
  ;;   Matches an underline NOT followed by ".
  (defconst c-awk-harmless-string*-re
    (concat "\\(" c-awk-harmless-char-re "\\|" c-awk-esc-pair-re "\\|" c-awk-harmless-_ "\\)*"))
! ;;   Matches a (possibly empty) sequence of chars without unescaped /, ", \,
! ;; #, or newlines.
  (defconst c-awk-harmless-string*-here-re
    (concat "\\=" c-awk-harmless-string*-re))
! ;; Matches the (possibly empty) sequence of chars without unescaped /, ", \,
! ;; at point.
  (defconst c-awk-harmless-line-re
!   (concat c-awk-harmless-string*-re
!           "\\(" c-awk-comment-without-nl "\\)?" c-awk-nl-or-eob))
  ;;   Matches (the tail of) an AWK \"logical\" line not containing an unescaped
  ;; " or /.  "logical" means "possibly containing escaped newlines".  A comment
  ;; is matched as part of the line even if it contains a " or a /.  The End of
--- 127,155 ----
  ;; escaped EOL.
  
  ;; REGEXPS FOR "HARMLESS" STRINGS/LINES.
  (defconst c-awk-harmless-_ "_\\([^\"]\\|\\'\\)")
  ;;   Matches an underline NOT followed by ".
+ (defconst c-awk-harmless-char-re "[^_#/\"{}();\\\\\n\r]")
+ ;;   Mathches any character not significant in the state machine applying
+ ;; syntax-table properties to "s and /s.
  (defconst c-awk-harmless-string*-re
    (concat "\\(" c-awk-harmless-char-re "\\|" c-awk-esc-pair-re "\\|" c-awk-harmless-_ "\\)*"))
! ;;   Matches a (possibly empty) sequence of characters insignificant in the
! ;; state machine applying syntax-table properties to "s and /s.
  (defconst c-awk-harmless-string*-here-re
    (concat "\\=" c-awk-harmless-string*-re))
! ;; Matches the (possibly empty) sequence of "insignificant" chars at point.
! 
! (defconst c-awk-harmless-line-char-re "[^_#/\"\\\\\n\r]")
! ;;   Matches any character but a _, #, /, ", \, or newline.  N.B. _" starts a
! ;; localisation string in gawk 3.1
! (defconst c-awk-harmless-line-string*-re
!   (concat "\\(" c-awk-harmless-line-char-re "\\|" c-awk-esc-pair-re "\\|" c-awk-harmless-_ "\\)*"))
! ;;   Matches a (possibly empty) sequence of chars without unescaped /, ", \,
! ;; #, or newlines.
  (defconst c-awk-harmless-line-re
!   (concat c-awk-harmless-line-string*-re
! 	  "\\(" c-awk-comment-without-nl "\\)?" c-awk-nl-or-eob))
  ;;   Matches (the tail of) an AWK \"logical\" line not containing an unescaped
  ;; " or /.  "logical" means "possibly containing escaped newlines".  A comment
  ;; is matched as part of the line even if it contains a " or a /.  The End of
***************
*** 211,217 ****
  ;; division sign.
  (defconst c-awk-neutral-re
  ;  "\\([{}@` \t]\\|\\+\\+\\|--\\|\\\\.\\)+") ; changed, 2003/6/7
!   "\\([{}@` \t]\\|\\+\\+\\|--\\|\\\\.\\)")
  ;;   A "neutral" char(pair).  Doesn't change the "state" of a subsequent /.
  ;; This is space/tab, braces, an auto-increment/decrement operator or an
  ;; escaped character.  Or one of the (invalid) characters @ or `.  But NOT an
--- 218,224 ----
  ;; division sign.
  (defconst c-awk-neutral-re
  ;  "\\([{}@` \t]\\|\\+\\+\\|--\\|\\\\.\\)+") ; changed, 2003/6/7
!   "\\([}@` \t]\\|\\+\\+\\|--\\|\\\\\\(.\\|[\n\r]\\)\\)")
  ;;   A "neutral" char(pair).  Doesn't change the "state" of a subsequent /.
  ;; This is space/tab, braces, an auto-increment/decrement operator or an
  ;; escaped character.  Or one of the (invalid) characters @ or `.  But NOT an
***************
*** 231,238 ****
  ;; will only work when there won't be a preceding " or / before the sought /
  ;; to foul things up.
  (defconst c-awk-non-arith-op-bra-re
!   "[[\(&=:!><,?;'~|]")
! ;;   Matches an opening BRAcket, round or square, or any operator character
  ;; apart from +,-,/,*,%.  For the purpose at hand (detecting a / which is a
  ;; regexp bracket) these arith ops are unnecessary and a pain, because of "++"
  ;; and "--".
--- 238,245 ----
  ;; will only work when there won't be a preceding " or / before the sought /
  ;; to foul things up.
  (defconst c-awk-non-arith-op-bra-re
!   "[[\({&=:!><,?;'~|]")
! ;;   Matches an openeing BRAcket ,round or square, or any operator character
  ;; apart from +,-,/,*,%.  For the purpose at hand (detecting a / which is a
  ;; regexp bracket) these arith ops are unnecessary and a pain, because of "++"
  ;; and "--".
***************
*** 242,247 ****
--- 249,264 ----
  ;; bracket, in a context where an immediate / would be a division sign.  This
  ;; will only work when there won't be a preceding " or / before the sought /
  ;; to foul things up.
+ (defconst c-awk-pre-exp-alphanum-kwd-re
+   (concat "\\(^\\|[^_\n\r]\\)\\<"
+ 	  (regexp-opt '("print" "return" "case") t)
+ 	  "\\>\\([^_\n\r]\\|$\\)"))
+ ;;   Matches all AWK keywords which can precede expressions (including
+ ;; /regexp/).
+ (defconst c-awk-kwd-regexp-sign-re
+   (concat c-awk-pre-exp-alphanum-kwd-re c-awk-neutrals*-re "/"))
+ ;;   Matches a piece of AWK buffer ending in <kwd> /, where <kwd> is a keyword
+ ;; which can precede an expression.
  
  ;; REGEXPS USED FOR FINDING THE POSITION OF A "virtual semicolon"
  (defconst c-awk-_-harmless-nonws-char-re "[^#/\"\\\\\n\r \t]")
***************
*** 721,729 ****
      (goto-char anchor)
      ;; Analyze the line to find out what the / is.
      (if (if anchor-state-/div
!             (not (search-forward-regexp c-awk-regexp-sign-re (1+ /point) t))
!           (search-forward-regexp c-awk-div-sign-re (1+ /point) t))
!         ;; A division sign.
  	(progn (goto-char (1+ /point)) nil)
        ;; A regexp opener
        ;; Jump over the regexp innards, setting the match data.
--- 738,747 ----
      (goto-char anchor)
      ;; Analyze the line to find out what the / is.
      (if (if anchor-state-/div
! 	    (not (search-forward-regexp c-awk-regexp-sign-re (1+ /point) t))
! 	  (and (not (search-forward-regexp c-awk-kwd-regexp-sign-re (1+ /point) t))
! 	       (search-forward-regexp c-awk-div-sign-re (1+ /point) t)))
! 	;; A division sign.
  	(progn (goto-char (1+ /point)) nil)
        ;; A regexp opener
        ;; Jump over the regexp innards, setting the match data.
***************
*** 776,787 ****
               (< (point) lim))
        (setq anchor (point))
        (search-forward-regexp c-awk-harmless-string*-here-re nil t)
!       ;; We are now looking at either a " or a /.
!       ;; Do our thing on the string, regexp or division sign.
        (setq anchor-state-/div
!             (if (looking-at "_?\"")
!                 (c-awk-syntax-tablify-string)
!               (c-awk-syntax-tablify-/ anchor anchor-state-/div))))
      nil))
  
  ;; ACM, 2002/07/21: Thoughts: We need an AWK Mode after-change function to set
--- 794,813 ----
               (< (point) lim))
        (setq anchor (point))
        (search-forward-regexp c-awk-harmless-string*-here-re nil t)
!       ;; We are now looking at either a " or a / or a brace/paren/semicolon.
!       ;; Do our thing on the string, regexp or divsion sign or update our state.
        (setq anchor-state-/div
! 	    (cond
! 	     ((looking-at "_?\"")
! 	      (c-awk-syntax-tablify-string))
! 	     ((eq (char-after) ?/)
! 	      (c-awk-syntax-tablify-/ anchor anchor-state-/div))
! 	     ((memq (char-after) '(?{ ?} ?\( ?\;))
! 	      (forward-char)
! 	      nil)
! 	     (t 			; ?\)
! 	      (forward-char)
! 	      t))))
      nil))
  
  ;; ACM, 2002/07/21: Thoughts: We need an AWK Mode after-change function to set


> Leo

-- 
Alan Mackenzie (Nuremberg, Germany).





  reply	other threads:[~2013-01-27 18:59 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-01-24 11:43 bug#13541: 24.2.92; awk-mode: wrong font locking regexp literals Leo Liu
2013-01-24 18:28 ` Glenn Morris
     [not found] ` <b5ehhajuzy.fsf@fencepost.gnu.org>
2013-01-24 22:16   ` Alan Mackenzie
2013-01-25  1:20     ` Leo Liu
2013-01-25  1:33       ` Glenn Morris
2013-01-25  1:44         ` Glenn Morris
2013-01-25 21:32           ` Richard Stallman
2013-01-25  8:44       ` bug#12274: 24.2; awk-mode indentation failure Alan Mackenzie
2013-01-25 12:58         ` Stefan Monnier
2013-01-25 17:33         ` Glenn Morris
2013-01-25 19:17         ` Alan Mackenzie
2013-01-25 17:50 ` bug#13541: 24.2.92; awk-mode: wrong font locking regexp literals Alan Mackenzie
2013-01-26 11:14   ` Leo Liu
2013-01-27 18:59     ` Alan Mackenzie [this message]
     [not found]     ` <20130127185906.GA16161__1271.15463042191$1359313643$gmane$org@acm.acm>
2013-01-28  1:12       ` Leo Liu
2013-01-28 11:14         ` Alan Mackenzie
     [not found]         ` <20130128111417.GA3330@acm.acm>
2013-01-28 12:11           ` Leo Liu
2013-01-29 20:58 ` Alan Mackenzie

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://www.gnu.org/software/emacs/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20130127185906.GA16161@acm.acm \
    --to=acm@muc.de \
    --cc=13541@debbugs.gnu.org \
    --cc=sdl.web@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).