Hi, Ivan. I've now got a patch, which I'd be grateful if you could try out, both to see if there are any bugs, and also to get your general impression. I think there are one or two bugs left in the code, and it needs tidying up quite a lot. So this won't be the final version. The patch will work only on the savannah master branch - sorry, but it depends on the fix to the "infrastructure" bug which I committed to master only this morning (timezone +0200). I'm also attaching a small test file which might interest you. On Sat, May 28, 2016 at 02:40:45PM +0000, Alan Mackenzie wrote: > Thanks for the suggestion! I've actually had an almost working solution > myself for just over a week. Then I got confused with a bug in some CC > Mode "infrastructure" code. Such is life! > The way I am fontifying these is thus: > (i) For a correctly terminated raw string, everything between the ( and ) > inclusive gets string face, everything else just the default face: > R"foo(bar)foo" > ^^^^^ > font-lock-string-face. > (ii) For a construct with a raw string opener, not correctly terminated, > I am putting warning face on the entire raw string opener, leaving the > rest of the string with string face, e.g.: > R"baz(bar)foo" > ^^^^^^ > font-lock-warning-face > ^^^^^^^^ > font-lock-string-face > Of course, that is subject to change if it doesn't work very well. > CC Mode doesn't actually use syntax-ppss and syntax-propertize-function, > since they don't allow enough control. In particular, on a buffer > change, they erase all syntax-table text properties between point and end > of buffer which is wasteful; it is never necessary to erase these beyond > the next end of statement, and they are quite expensive to apply. > Anyhow, we should be able to have this implemented and the bug closed > pretty soon. The patch: diff --git a/lisp/progmodes/cc-engine.el b/lisp/progmodes/cc-engine.el index 4d6a120..460c773 100644 --- a/lisp/progmodes/cc-engine.el +++ b/lisp/progmodes/cc-engine.el @@ -2293,7 +2293,8 @@ c-state-pp-to-literal ;; (STATE TYPE (BEG . END)) if TO is in a literal; or ;; (STATE) otherwise, ;; where STATE is the parsing state at TO, TYPE is the type of the literal - ;; (one of 'c, 'c++, 'string) and (BEG . END) is the boundaries of the literal. + ;; (one of 'c, 'c++, 'string) and (BEG . END) is the boundaries of the literal, + ;; including the delimiters. ;; ;; Unless NOT-IN-DELIMITER is non-nil, when TO is inside a two-character ;; comment opener, this is recognized as being in a comment literal. @@ -5777,6 +5778,139 @@ c-restore-<>-properties 'c-decl-arg-start))))))) (or (c-forward-<>-arglist nil) (forward-char))))) + + +;; Routines to handle C++ raw strings. +(defun c-raw-string-pos () + ;; Get POINT's relationship to any containing raw string. + ;; If point isn't in a raw string, return nil. + ;; Otherwise, return the following list: + ;; + ;; (POS B\" B\( E\) E\") + ;; + ;; , where POS is the symbol `open-delim' if point is in the opening + ;; delimiter, the symbol `close-delim' if it's in the closing delimiter, and + ;; nil if it's in the string body. B\", B\(, E\), E\" are the positions of + ;; the opening and closing quotes and parentheses of a correctly terminated + ;; raw string. (N.B.: E\) and E\" are NOT on the "outside" of these + ;; characters.) If the raw string is not terminated, E\) and E\" are set to + ;; nil. + ;; + ;; Note: this routine is dependant upon the correct syntax-table text + ;; properties being set. + (let* ((safe (c-state-semi-safe-place (point))) + (state (c-state-pp-to-literal safe (point))) + open-quote-pos open-paren-pos close-paren-pos close-quote-pos id) + (save-excursion + (when + (and + (cond + ((null (cadr state)) + (or (eq (char-after) ?\") + (search-backward "\"" (max (- (point) 17) (point-min)) t))) + ((and (eq (cadr state) 'string) + (goto-char (car (nth 2 state))) + (or (eq (char-after) ?\") + (search-backward "\"" (max (- (point) 17) (point-min)) t)) + (not (bobp))))) + (eq (char-before) ?R) + (looking-at "\"\\([^ ()\\\n\r\t]\\{,16\\}\\)(")) + (setq open-quote-pos (point) + open-paren-pos (match-end 1) + id (match-string-no-properties 1)) + (goto-char (1+ open-paren-pos)) + (when (and (not (c-get-char-property open-paren-pos 'syntax-table)) + (search-forward-regexp (concat ")" id "\"") nil t)) + (setq close-paren-pos (match-beginning 0) + close-quote-pos (1- (point)))))) + (and open-quote-pos + (list + (cond + ((<= (point) open-paren-pos) + 'open-delim) + ((and close-paren-pos + (> (point) close-paren-pos)) + 'close-delim) + (t nil)) + open-quote-pos open-paren-pos close-paren-pos close-quote-pos)))) + +(defun c-clear-raw-string-syntax-table-properties (raw) + (if (nth 2 raw) + ;; Clear out punctuation syntax-table text props from the string body. + (c-clear-char-property-with-value + (cadr raw) (nth 2 raw) 'syntax-table '(1)) + ;; unclosed raw string. + (c-clear-char-property (car raw) 'syntax-table) + (c-clear-char-property (cadr raw) 'syntax-table)) + (setq c-new-BEG (min c-new-BEG (car raw))) + (setq c-new-END (max c-new-END (1+ (cadr raw))))) + +(defun c-before-change-check-c++-raw-strings (beg end) + ;; This functions clears syntax-table text properties from C++ raw strings + ;; which are being chnaged, or are associated with a change. + (c-save-buffer-state + ((beg-rs (save-excursion (goto-char c-new-BEG) (c-raw-string-pos))) + (end-rs (save-excursion (goto-char c-new-END) (c-raw-string-pos))) + ) + (cond + ;; Neither BEG nor END are in raw strings. + ((and (null beg-rs) (null end-rs))) + ;; BEG is in the opening delimiter or "body" of an unterminated string. + ((and beg-rs (null (nth 3 beg-rs))) + (c-clear-raw-string-syntax-table-properties (cdr beg-rs))) + ;; BEG and END are both in the body of the same raw string. + ((and (equal (cdr beg-rs) (cdr end-rs)) + (null (car beg-rs)) (null (car end-rs)))) + ;; BEG and END are in the same raw string, (at least) one of them in a + ;; delimiter. + ((equal (cdr beg-rs) (cdr end-rs)) + (c-clear-raw-string-syntax-table-properties (cdr beg-rs))) + ;; BEG is in some raw string, END isn't in it. + (beg-rs + (c-clear-raw-string-syntax-table-properties (cdr beg-rs)) + (when end-rs + (c-clear-raw-string-syntax-table-properties (cdr end-rs)))) + ;; BEG isn't in a raw string, END is. + (end-rs + (c-clear-raw-string-syntax-table-properties (cdr end-rs)))))) + +(defun c-temp-before-change (beg end) + (setq c-new-BEG beg + c-new-END end) + (c-before-change-check-c++-raw-strings beg end)) + +(defun c-after-change-mark-raw-strings (beg end old-len) + ;; Put any needed text properties on raw strings. This function is called + ;; as an after-change function. + (save-excursion + (c-save-buffer-state () + (goto-char c-new-BEG) + (while (and (< (point) c-new-END) + (c-syntactic-re-search-forward + "R\"\\([^ ()\\\n\r\t]\\{,16\\}\\)(" c-new-END t)) + (let ((id (match-string-no-properties 1)) + (open-quote (1+ (match-beginning 0))) + (open-paren (match-end 1)) + ) + (if (search-forward-regexp (concat ")" id "\"") nil t) + (let ((end-string (match-beginning 0)) + (after-quote (match-end 0)) + ) + (goto-char open-paren) + (while (progn (skip-syntax-forward "^\"" end-string) + (< (point) end-string)) + (c-put-char-property (point) 'syntax-table '(1)) ; punctuation + (forward-char)) + (goto-char after-quote)) + (c-put-char-property open-quote 'syntax-table '(1)) ; punctuation + (c-put-char-property open-paren 'syntax-table '(15))))) ; generic string + + ))) + +(defun c-temp-after-change (beg end old-len) + (setq c-new-BEG beg + c-new-END end) + (c-after-change-mark-raw-strings beg end old-len)) ;; Handling of small scale constructs like types and names. diff --git a/lisp/progmodes/cc-fonts.el b/lisp/progmodes/cc-fonts.el index 4e83d6d..fd8065a 100644 --- a/lisp/progmodes/cc-fonts.el +++ b/lisp/progmodes/cc-fonts.el @@ -723,6 +723,10 @@ c-font-lock-invalid-string (concat ".\\(" c-string-limit-regexp "\\)") '((c-font-lock-invalid-string))) + ;; Fontify C++ raw strings. + ,@(when (c-major-mode-is 'c++-mode) + '(c-font-lock-c++-raw-strings)) + ;; Fontify keyword constants. ,@(when (c-lang-const c-constant-kwds) (let ((re (c-make-keywords-re nil (c-lang-const c-constant-kwds)))) @@ -1571,6 +1575,34 @@ c-font-lock-enclosing-decls (c-forward-syntactic-ws) (c-font-lock-declarators limit t in-typedef))))))) +(defun c-font-lock-c++-raw-strings (limit) + ;; Fontify C++ raw strings. + ;; + ;; This function will be called from font-lock for a region bounded by POINT + ;; and LIMIT, as though it were to identify a keyword for + ;; font-lock-keyword-face. It always returns NIL to inhibit this and + ;; prevent a repeat invocation. See elisp/lispref page "Search-based + ;; Fontification". + (while (search-forward-regexp + "R\\(\"\\)\\([^ ()\\\n\r\t]\\{,16\\}\\)(" limit t) + (when ;; (eq (c-get-char-property (1- (point)) 'face) + ;; 'font-lock-string-face) + (or (and (eobp) + (eq (c-get-char-property (1- (point)) 'face) + 'font-lock-warning-face)) + (eq (c-get-char-property (point) 'face) 'font-lock-string-face)) + (if (c-get-char-property (1- (point)) 'syntax-table) + (c-put-font-lock-face (match-beginning 0) (match-end 0) + 'font-lock-warning-face) + (c-put-font-lock-face (match-beginning 1) (match-end 2) + 'default) + (when (search-forward-regexp + (concat ")\\(" (match-string-no-properties 2) "\\)\"") + limit t) + (c-put-font-lock-face (match-beginning 1) (point) + 'default))))) + nil) + (c-lang-defconst c-simple-decl-matchers "Simple font lock matchers for types and declarations. These are used on level 2 only and so aren't combined with `c-complex-decl-matchers'." diff --git a/lisp/progmodes/cc-langs.el b/lisp/progmodes/cc-langs.el index 6f4d1f1..8ba0c5c 100644 --- a/lisp/progmodes/cc-langs.el +++ b/lisp/progmodes/cc-langs.el @@ -474,9 +474,12 @@ c-populate-syntax-table ;; The value here may be a list of functions or a single function. t nil c++ '(c-extend-region-for-CPP + c-depropertize-region + c-before-change-check-c++-raw-strings c-before-change-check-<>-operators c-invalidate-macro-cache) (c objc) '(c-extend-region-for-CPP + c-depropertize-region c-invalidate-macro-cache) ;; java 'c-before-change-check-<>-operators awk 'c-awk-record-region-clear-NL) @@ -509,7 +512,8 @@ c-populate-syntax-table (c objc) '(c-extend-font-lock-region-for-macros c-neutralize-syntax-in-and-mark-CPP c-change-expand-fl-region) - c++ '(c-extend-font-lock-region-for-macros + c++ '(c-after-change-mark-raw-strings + c-extend-font-lock-region-for-macros c-neutralize-syntax-in-and-mark-CPP c-restore-<>-properties c-change-expand-fl-region) diff --git a/lisp/progmodes/cc-mode.el b/lisp/progmodes/cc-mode.el index 9ab0480..53322cf 100644 --- a/lisp/progmodes/cc-mode.el +++ b/lisp/progmodes/cc-mode.el @@ -877,6 +877,16 @@ c-called-from-text-property-change-p (memq (cadr (backtrace-frame 3)) '(put-text-property remove-list-of-text-properties))) +(defun c-depropertize-region (beg end) + ;; Remove the punctuation syntax-table text property from the region + ;; (c-new-BEG c-new-END). + ;; + ;; This function is in the C/C++/ObjC values of + ;; `c-get-state-before-change-functions' and is called exclusively as a + ;; before change function. + (c-clear-char-property-with-value + c-new-BEG c-new-END 'syntax-table '(1))) + (defun c-extend-region-for-CPP (beg end) ;; Adjust `c-new-BEG', `c-new-END' respectively to the beginning and end of ;; any preprocessor construct they may be in. @@ -969,7 +979,7 @@ c-neutralize-syntax-in-and-mark-CPP ;; This function might make hidden buffer changes. (c-save-buffer-state (limits ) ;; Clear 'syntax-table properties "punctuation": - (c-clear-char-property-with-value c-new-BEG c-new-END 'syntax-table '(1)) + ;; (c-clear-char-property-with-value c-new-BEG c-new-END 'syntax-table '(1)) ;; CPP "comment" markers: (if (eval-when-compile (memq 'category-properties c-emacs-features));Emacs. -- Alan Mackenzie (Nuremberg, Germany).