unofficial mirror of bug-gnu-emacs@gnu.org 
 help / color / mirror / code / Atom feed
From: Alan Mackenzie <acm@muc.de>
To: "Mattias Engdegård" <mattiase@acm.org>
Cc: Lars Ingebrigtsen <larsi@gnus.org>, 25706@debbugs.gnu.org
Subject: bug#25706: 26.0.50; Slow C file fontification
Date: Fri, 4 Dec 2020 21:04:50 +0000	[thread overview]
Message-ID: <X8qkcokfZGbaK5A2__21992.436777667$1607116920$gmane$org@ACM> (raw)
In-Reply-To: <BFB6C808-847B-458D-B077-D69B2AFA8007@acm.org>

Hello, Mattias.

On Thu, Dec 03, 2020 at 15:03:27 +0100, Mattias Engdegård wrote:
> 3 dec. 2020 kl. 11.48 skrev Alan Mackenzie <acm@muc.de>:

> > I've found it.  There was a "harmless" c-backward-syntactic-ws
> > invocation in c-determine-limit.  This macro moves back over
> > syntactic whitespace, which includes macros.  So this was going back
> > all the way to BOB, from which we scanned forward again.

> Not bad. Now Emacs starts becoming usable for real code!  I can confirm
> a big subjective improvement on several big preprocessor-heavy files,
> and measurements agree.

I think you'll like my latest provisional patch!

I've tracked down and eliminated a ~0.5s delay when typing characters
into a "monster" buffer near the end.

> > It now takes me just under 6 minutes to (time-scroll) through the entire
> > buffer, compared with a previous hour.  As already mentioned, it is still
> > slightly more sluggish near the end of the buffer than near the start.

With the latest patch, it takes me 121s.

> Is that with or without my regexp patch?

Without.

> It looks like there may be more regexp improvements possible. We can
> take a closer look later on, when the running time is less dominated by
> other issues.

Maybe that time is now.  Please try the latest patch.  I think there are
still things needing optimisation in C++ Mode (make sure your monster
buffers are in C Mode, please).  But for now....



diff --git a/lisp/progmodes/cc-engine.el b/lisp/progmodes/cc-engine.el
index 252eec138c..22e6ef5894 100644
--- a/lisp/progmodes/cc-engine.el
+++ b/lisp/progmodes/cc-engine.el
@@ -972,7 +972,7 @@ c-beginning-of-statement-1
       ;; that we've moved.
       (while (progn
 	       (setq pos (point))
-	       (c-backward-syntactic-ws)
+	       (c-backward-syntactic-ws lim)
 	       ;; Protect post-++/-- operators just before a virtual semicolon.
 	       (and (not (c-at-vsemi-p))
 		    (/= (skip-chars-backward "-+!*&~@`#") 0))))
@@ -984,7 +984,7 @@ c-beginning-of-statement-1
       (if (and (memq (char-before) delims)
 	       (progn (forward-char -1)
 		      (setq saved (point))
-		      (c-backward-syntactic-ws)
+		      (c-backward-syntactic-ws lim)
 		      (or (memq (char-before) delims)
 			  (memq (char-before) '(?: nil))
 			  (eq (char-syntax (char-before)) ?\()
@@ -1164,7 +1164,7 @@ c-beginning-of-statement-1
                 ;; HERE IS THE SINGLE PLACE INSIDE THE PDA LOOP WHERE WE MOVE
 		;; BACKWARDS THROUGH THE SOURCE.
 
-		(c-backward-syntactic-ws)
+		(c-backward-syntactic-ws lim)
 		(let ((before-sws-pos (point))
 		      ;; The end position of the area to search for statement
 		      ;; barriers in this round.
@@ -1188,7 +1188,7 @@ c-beginning-of-statement-1
 			 ((and (not macro-start)
 			       (c-beginning-of-macro))
 			  (save-excursion
-			    (c-backward-syntactic-ws)
+			    (c-backward-syntactic-ws lim)
 			    (setq before-sws-pos (point)))
 			  ;; Have we crossed a statement boundary?  If not,
 			  ;; keep going back until we find one or a "real" sexp.
@@ -1413,7 +1413,7 @@ c-beginning-of-statement-1
 
       ;; Skip over the unary operators that can start the statement.
       (while (progn
-	       (c-backward-syntactic-ws)
+	       (c-backward-syntactic-ws lim)
 	       ;; protect AWK post-inc/decrement operators, etc.
 	       (and (not (c-at-vsemi-p (point)))
 		    (/= (skip-chars-backward "-.+!*&~@`#") 0)))
@@ -3568,15 +3568,18 @@ c-get-fallback-scan-pos
   ;; Return a start position for building `c-state-cache' from
   ;; scratch.  This will be at the top level, 2 defuns back.
   (save-excursion
-    ;; Go back 2 bods, but ignore any bogus positions returned by
-    ;; beginning-of-defun (i.e. open paren in column zero).
-    (goto-char here)
-    (let ((cnt 2))
-      (while (not (or (bobp) (zerop cnt)))
-	(c-beginning-of-defun-1)	; Pure elisp BOD.
-	(if (eq (char-after) ?\{)
-	    (setq cnt (1- cnt)))))
-    (point)))
+    (save-restriction
+      (when (> here (* 10 c-state-cache-too-far))
+	(narrow-to-region (- here (* 10 c-state-cache-too-far)) here))
+      ;; Go back 2 bods, but ignore any bogus positions returned by
+      ;; beginning-of-defun (i.e. open paren in column zero).
+      (goto-char here)
+      (let ((cnt 2))
+	(while (not (or (bobp) (zerop cnt)))
+	  (c-beginning-of-defun-1)	; Pure elisp BOD.
+	  (if (eq (char-after) ?\{)
+	      (setq cnt (1- cnt)))))
+      (point))))
 
 (defun c-state-balance-parens-backwards (here- here+ top)
   ;; Return the position of the opening paren/brace/bracket before HERE- which
@@ -3667,9 +3670,7 @@ c-parse-state-get-strategy
 	    how-far 0))
      ((<= good-pos here)
       (setq strategy 'forward
-	    start-point (if changed-macro-start
-			    cache-pos
-			  (max good-pos cache-pos))
+	    start-point (max good-pos cache-pos)
 	    how-far (- here start-point)))
      ((< (- good-pos here) (- here cache-pos)) ; FIXME!!! ; apply some sort of weighting.
       (setq strategy 'backward
@@ -4337,8 +4338,12 @@ c-invalidate-state-cache-1
       (if (and dropped-cons
 	       (<= too-high-pa here))
 	  (c-append-lower-brace-pair-to-state-cache too-high-pa here here-bol))
-      (setq c-state-cache-good-pos (or (c-state-cache-after-top-paren)
-				       (c-state-get-min-scan-pos)))))
+      (if (and c-state-cache-good-pos (< here c-state-cache-good-pos))
+	  (setq c-state-cache-good-pos
+		(or (save-excursion
+		      (goto-char here)
+		      (c-literal-start))
+		    here)))))
 
   ;; The brace-pair desert marker:
   (when (car c-state-brace-pair-desert)
@@ -5402,8 +5407,11 @@ c-syntactic-skip-backward
 	       ;; Optimize for, in particular, large blocks of comments from
 	       ;; `comment-region'.
 	       (progn (when opt-ws
-			(c-backward-syntactic-ws)
-			(setq paren-level-pos (point)))
+			(let ((opt-pos (point)))
+			  (c-backward-syntactic-ws limit)
+			  (if (> (point) limit)
+			      (setq paren-level-pos (point))
+			    (goto-char opt-pos))))
 		      t)
 	       ;; Move back to a candidate end point which isn't in a literal
 	       ;; or in a macro we didn't start in.
@@ -5423,7 +5431,10 @@ c-syntactic-skip-backward
 				     (setq macro-start (point))))
 			    (goto-char macro-start))))
 		   (when opt-ws
-		     (c-backward-syntactic-ws)))
+		     (let ((opt-pos (point)))
+		       (c-backward-syntactic-ws limit)
+		       (if (<= (point) limit)
+			   (goto-char opt-pos)))))
 		 (< (point) pos))
 
 	       ;; Check whether we're at the wrong level of nesting (when
@@ -5766,8 +5777,6 @@ c-determine-limit-get-base
   ;; Get a "safe place" approximately TRY-SIZE characters before START.
   ;; This defsubst doesn't preserve point.
   (goto-char start)
-  (c-backward-syntactic-ws)
-  (setq start (point))
   (let* ((pos (max (- start try-size) (point-min)))
 	 (s (c-semi-pp-to-literal pos))
 	 (cand (or (car (cddr s)) pos)))
@@ -6248,8 +6257,13 @@ c-find-decl-prefix-search
        ;; preceding syntactic ws to set `cfd-match-pos' and to catch
        ;; any decl spots in the syntactic ws.
        (unless cfd-re-match
-	 (c-backward-syntactic-ws)
-	 (setq cfd-re-match (point))))
+	 (let ((cfd-cbsw-lim (- (point) 1000)))
+	   (c-backward-syntactic-ws cfd-cbsw-lim)
+	   (setq cfd-re-match
+		 (if (> (point) cfd-cbsw-lim)
+		     (point)
+		   0)))		   ; Set BOB case if the token's too far back.
+	 ))
 
      ;; Choose whichever match is closer to the start.
      (if (< cfd-re-match cfd-prop-match)
@@ -6482,7 +6496,10 @@ c-find-decl-spots
 	(c-invalidate-find-decl-cache cfd-start-pos)
 
 	(setq syntactic-pos (point))
-	(unless (eq syntactic-pos c-find-decl-syntactic-pos)
+	(unless
+	    (or (eq syntactic-pos c-find-decl-syntactic-pos)
+		(null c-find-decl-syntactic-pos)
+		(< c-find-decl-syntactic-pos (- (point) 10000)))
 	  ;; Don't have to do this if the cache is relevant here,
 	  ;; typically if the same line is refontified again.  If
 	  ;; we're just some syntactic whitespace further down we can
diff --git a/lisp/progmodes/cc-fonts.el b/lisp/progmodes/cc-fonts.el
index bb7e5bea6e..07dcefb8d1 100644
--- a/lisp/progmodes/cc-fonts.el
+++ b/lisp/progmodes/cc-fonts.el
@@ -947,7 +947,7 @@ c-font-lock-complex-decl-prepare
     ;; closest token before the region.
     (save-excursion
       (let ((pos (point)))
-	(c-backward-syntactic-ws)
+	(c-backward-syntactic-ws (max (- (point) 500) (point-min)))
 	(c-clear-char-properties
 	 (if (and (not (bobp))
 		  (memq (c-get-char-property (1- (point)) 'c-type)
@@ -969,7 +969,7 @@ c-font-lock-complex-decl-prepare
     ;; The declared identifiers are font-locked correctly as types, if
     ;; that is what they are.
     (let ((prop (save-excursion
-		  (c-backward-syntactic-ws)
+		  (c-backward-syntactic-ws (max (- (point) 500) (point-min)))
 		  (unless (bobp)
 		    (c-get-char-property (1- (point)) 'c-type)))))
       (when (memq prop '(c-decl-id-start c-decl-type-start))
@@ -1496,7 +1496,8 @@ c-font-lock-declarations
 
 		 ;; Check we haven't missed a preceding "typedef".
 		 (when (not (looking-at c-typedef-key))
-		   (c-backward-syntactic-ws)
+		   (c-backward-syntactic-ws
+		    (max (- (point) 1000) (point-min)))
 		   (c-backward-token-2)
 		   (or (looking-at c-typedef-key)
 		       (goto-char start-pos)))
@@ -1536,8 +1537,10 @@ c-font-lock-declarations
 				     (c-backward-token-2)
 				     (and
 				      (not (looking-at c-opt-<>-sexp-key))
-				      (progn (c-backward-syntactic-ws)
-					     (memq (char-before) '(?\( ?,)))
+				      (progn
+					(c-backward-syntactic-ws
+					 (max (- (point) 1000) (point-min)))
+					(memq (char-before) '(?\( ?,)))
 				      (not (eq (c-get-char-property (1- (point))
 								    'c-type)
 					       'c-decl-arg-start))))))
@@ -2295,7 +2298,8 @@ c-font-lock-c++-using
 		  (and c-colon-type-list-re
 		       (c-go-up-list-backward)
 		       (eq (char-after) ?{)
-		       (eq (car (c-beginning-of-decl-1)) 'same)
+		       (eq (car (c-beginning-of-decl-1
+				 (c-determine-limit 1000))) 'same)
 		       (looking-at c-colon-type-list-re)))
 		;; Inherited protected member: leave unfontified
 		)


-- 
Alan Mackenzie (Nuremberg, Germany).





  reply	other threads:[~2020-12-04 21:04 UTC|newest]

Thread overview: 45+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-02-13 18:20 bug#25706: 26.0.50; Slow C file fontification Sujith
2020-11-30 11:26 ` Lars Ingebrigtsen
2020-11-30 11:37   ` Lars Ingebrigtsen
2020-11-30 12:46 ` Mattias Engdegård
2020-11-30 12:49   ` Lars Ingebrigtsen
2020-11-30 16:27   ` Eli Zaretskii
2020-11-30 16:38   ` Alan Mackenzie
2020-11-30 16:53     ` Mattias Engdegård
2020-11-30 17:04       ` Mattias Engdegård
2020-12-01  5:48         ` Ravine Var
2020-12-01 13:34           ` Mattias Engdegård
2020-12-01  9:29         ` Alan Mackenzie
2020-12-01  9:44           ` martin rudalics
2020-12-01 10:07             ` Alan Mackenzie
2020-12-01  9:21       ` Alan Mackenzie
2020-12-01 12:03         ` Mattias Engdegård
2020-12-01 12:57           ` Alan Mackenzie
2020-12-01 14:07             ` Mattias Engdegård
2020-12-01 15:27               ` Alan Mackenzie
2020-12-01 18:59                 ` Mattias Engdegård
2020-12-02 10:15                   ` Alan Mackenzie
     [not found]                   ` <X8dpQeGaDD1w3kXX@ACM>
2020-12-02 15:06                     ` Mattias Engdegård
2020-12-03 10:48                       ` Alan Mackenzie
2020-12-03 14:03                         ` Mattias Engdegård
2020-12-04 21:04                           ` Alan Mackenzie [this message]
     [not found]                           ` <X8qkcokfZGbaK5A2@ACM>
2020-12-05 15:20                             ` Mattias Engdegård
2020-12-08 18:42                               ` Alan Mackenzie
     [not found]                               ` <X8/JG7eD7SfkEimH@ACM>
2020-12-08 19:32                                 ` Mattias Engdegård
2020-12-09  7:31                                 ` Ravine Var
2020-12-09  7:47                                   ` Ravine Var
2020-12-10  8:08                                     ` Alan Mackenzie
2020-12-09 18:46                                   ` Alan Mackenzie
     [not found]                                   ` <X9Ebn7hKnG/vpDcZ@ACM>
2020-12-09 20:04                                     ` Eli Zaretskii
2020-12-09 20:32                                       ` Alan Mackenzie
2020-12-10 17:02                                     ` Ravine Var
2020-12-10 20:02                                       ` Alan Mackenzie
2020-12-11 10:55                                         ` Ravine Var
2020-12-12 15:34                                           ` Alan Mackenzie
     [not found]                                           ` <X9TjCeydJaE2mpK8@ACM>
2020-12-14  7:20                                             ` Ravine Var
2020-12-14 11:44                                               ` Alan Mackenzie
2020-12-15  4:01                                                 ` Ravine Var
2020-12-15 12:27                                                   ` Alan Mackenzie
2020-12-09 17:00                                 ` Mattias Engdegård
2020-12-10 12:26                                   ` Alan Mackenzie
2020-11-30 18:30   ` Alan Mackenzie

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://www.gnu.org/software/emacs/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='X8qkcokfZGbaK5A2__21992.436777667$1607116920$gmane$org@ACM' \
    --to=acm@muc.de \
    --cc=25706@debbugs.gnu.org \
    --cc=larsi@gnus.org \
    --cc=mattiase@acm.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).