all messages for Emacs-related lists mirrored at yhetil.org
 help / color / mirror / code / Atom feed
From: Stefan Monnier <monnier@iro.umontreal.ca>
To: emacs-devel@gnu.org
Subject: Loading dynamic docstrings and bytecode more lazily
Date: Sat, 09 Feb 2013 00:13:41 -0500	[thread overview]
Message-ID: <jwvwqui3wsz.fsf-monnier+emacs@gnu.org> (raw)

Based on discussions with T.V.Raman, I tried to change the current code
that handles dynamic docstrings (and byte-compile-dynamic), so as to get
closer to the original behavior of the `dynlib' package, where those
dynamic objects were moved into a separate file.

In the patch below, those dynamic objects (which currently are sprinkled
all over the .elc file within "comments") aren't moved out of the .elc
file, but they're moved to the end of the file and the `load' code is
changed so as to skip over them without even reading them, so the I/O
performance is improved by not fetching those bytes from the harddisk
at all.

E.g. with this patch, loading cl-macs.elc only reads the first 18KB of
the 74KB file.

Sadly, the result is disappointing.  When loading cl-macs.elc (i.e. one
the files where the change should shine), I see a speed up of 4%.
Admittedly, this is with a warm cache (basically, I read cl-macs.elc
many times in a loop), whereas the main point of this optimization is to
reduce I/O, so it should be more useful in the cold cache situation.
So maybe, the improvement is more significant in other circumstances.

So, my current decision is to keep this patch in my "treasure chest"
because it's simply not worth the trouble.  But if someone can make
a good case for it, I could change my mind.


        Stefan


Using changes with id "22".
Message: Move dynamic-docstrings to eof
 M  lisp/emacs-lisp/bytecomp.el
 M  src/doc.c
 M  src/lread.c
=== modified file 'lisp/emacs-lisp/bytecomp.el'
--- a/lisp/emacs-lisp/bytecomp.el	2013-02-09 04:52:53 +0000
+++ b/lisp/emacs-lisp/bytecomp.el	2013-02-09 04:53:01 +0000
@@ -1474,6 +1474,7 @@
 ;; Dynamically bound in byte-compile-from-buffer.
 ;; NB also used in cl.el and cl-macs.el.
 (defvar byte-compile--outbuffer)
+(defvar byte-compile--backpatch)
 
 (defmacro byte-compile-close-variables (&rest body)
   (declare (debug t))
@@ -1885,6 +1886,7 @@
          (setq byte-compile--outbuffer
                (get-buffer-create " *Compiler Output*"))
        (set-buffer-multibyte t)
+       (setq-local byte-compile--backpatch nil)
        (erase-buffer)
        ;;	 (emacs-lisp-mode)
        (setq case-fold-search nil))
@@ -1927,43 +1929,86 @@
       (and byte-compile-current-file
 	   (with-current-buffer byte-compile--outbuffer
 	     (byte-compile-fix-header byte-compile-current-file))))
+     (byte-compile--backpatch byte-compile--outbuffer)
+     ;; Return compiled result.
      byte-compile--outbuffer)))
 
+(defun byte-compile--backpatch (outbuf)
+  ;; Back patch forward references to dynamic docstrings.
+  (with-current-buffer outbuf
+    ;; FIXME:compat: (cl-assert (looking-at "#@\\|\\'"))
+    (let* ((base (- (position-bytes (point)) (position-bytes (point-min))))
+           (cutoff base)
+           (skipped (- (position-bytes (point-max))
+                       (position-bytes (point))))
+           (tail (+ skipped 5))) ;; FIXME:compat: (length (number-to-string (1+ skipped)))
+      (while
+          (let ((newcutoff base)
+                (end (+ cutoff tail)))
+            ;; (message "Compute cutoff from base=%s cutoff=%s skipped=%s tail=%s"
+            ;;          base cutoff skipped tail)
+            (dolist (bp byte-compile--backpatch)
+              (let* ((off (cdr bp))
+                     (ref (if (> off 0) (- end off) (- (+ end off)))))
+                (cl-incf newcutoff (1- (length (number-to-string ref))))))
+            (prog1 (> newcutoff cutoff)
+              (setq cutoff newcutoff))))
+      (save-excursion
+        (let ((end (+ cutoff tail)))
+          (dolist (bp byte-compile--backpatch)
+            (goto-char (car bp))
+            (cl-assert (looking-at "0)"))
+            (delete-char 1)
+            (let* ((off (cdr bp))
+                   (ref (if (> off 0) (- end off) (- (+ end off)))))
+              (insert (number-to-string ref))))))
+      (cl-assert (eq cutoff (- (position-bytes (point))
+			       (position-bytes (point-min)))))
+      (save-excursion
+        (insert "#@00\037") ;; FIXME:compat: (number-to-string (1+ skipped)) " "
+        ;; When the reader sees a "#" it decides "there's something here",
+        ;; so we need to put something real after the #@nnn.
+        ;; FIXME:compat: (goto-char (point-max)) (insert "nil")
+        )
+      (kill-local-variable 'byte-compile--backpatch))))
+
+
 (defun byte-compile-fix-header (_filename)
   "If the current buffer has any multibyte characters, insert a version test."
-  (when (< (point-max) (position-bytes (point-max)))
-    (goto-char (point-min))
-    ;; Find the comment that describes the version condition.
-    (search-forward "\n;;; This file uses")
-    (narrow-to-region (line-beginning-position) (point-max))
-    ;; Find the first line of ballast semicolons.
-    (search-forward ";;;;;;;;;;")
-    (beginning-of-line)
-    (narrow-to-region (point-min) (point))
-    (let ((old-header-end (point))
-	  (minimum-version "23")
-	  delta)
-      (delete-region (point-min) (point-max))
-      (insert
-       ";;; This file contains utf-8 non-ASCII characters,\n"
-       ";;; and so cannot be loaded into Emacs 22 or earlier.\n"
-       ;; Have to check if emacs-version is bound so that this works
-       ;; in files loaded early in loadup.el.
-       "(and (boundp 'emacs-version)\n"
-       ;; If there is a name at the end of emacs-version,
-       ;; don't try to check the version number.
-       "     (< (aref emacs-version (1- (length emacs-version))) ?A)\n"
-       (format "     (string-lessp emacs-version \"%s\")\n" minimum-version)
-       ;; Because the header must fit in a fixed width, we cannot
-       ;; insert arbitrary-length file names (Bug#11585).
-       "     (error \"`%s' was compiled for "
-       (format "Emacs %s or later\" #$))\n\n" minimum-version))
-      ;; Now compensate for any change in size, to make sure all
-      ;; positions in the file remain valid.
-      (setq delta (- (point-max) old-header-end))
-      (goto-char (point-max))
-      (widen)
-      (delete-char delta))))
+  (save-excursion
+    (when (< (point-max) (position-bytes (point-max)))
+      (goto-char (point-min))
+      ;; Find the comment that describes the version condition.
+      (search-forward "\n;;; This file uses")
+      (narrow-to-region (line-beginning-position) (point-max))
+      ;; Find the first line of ballast semicolons.
+      (search-forward ";;;;;;;;;;")
+      (beginning-of-line)
+      (narrow-to-region (point-min) (point))
+      (let ((old-header-end (point))
+            (minimum-version "23")
+            delta)
+        (delete-region (point-min) (point-max))
+        (insert
+         ";;; This file contains utf-8 non-ASCII characters,\n"
+         ";;; and so cannot be loaded into Emacs 22 or earlier.\n"
+         ;; Have to check if emacs-version is bound so that this works
+         ;; in files loaded early in loadup.el.
+         "(and (boundp 'emacs-version)\n"
+         ;; If there is a name at the end of emacs-version,
+         ;; don't try to check the version number.
+         "     (< (aref emacs-version (1- (length emacs-version))) ?A)\n"
+         (format "     (string-lessp emacs-version \"%s\")\n" minimum-version)
+         ;; Because the header must fit in a fixed width, we cannot
+         ;; insert arbitrary-length file names (Bug#11585).
+         "     (error \"`%s' was compiled for "
+         (format "Emacs %s or later\" #$))\n\n" minimum-version))
+        ;; Now compensate for any change in size, to make sure all
+        ;; positions in the file remain valid.
+        (setq delta (- (point-max) old-header-end))
+        (goto-char (point-max))
+        (widen)
+        (delete-char delta)))))
 
 (defun byte-compile-insert-header (filename outbuffer)
   "Insert a header at the start of OUTBUFFER.
@@ -2055,7 +2100,7 @@
   ;; in the input buffer (now current), not in the output buffer.
   (let ((dynamic-docstrings byte-compile-dynamic-docstrings))
     (with-current-buffer byte-compile--outbuffer
-      (let (position)
+      (let (offset)
 
         ;; Insert the doc string, and make it a comment with #@LENGTH.
         (and (>= (nth 1 info) 0)
@@ -2064,15 +2109,15 @@
                ;; Make the doc string start at beginning of line
                ;; for make-docfile's sake.
                (insert "\n")
-               (setq position
+               (setq offset
                      (byte-compile-output-as-comment
                       (nth (nth 1 info) form) nil))
                ;; If the doc string starts with * (a user variable),
-               ;; negate POSITION.
+               ;; negate OFFSET.
                (if (and (stringp (nth (nth 1 info) form))
                         (> (length (nth (nth 1 info) form)) 0)
                         (eq (aref (nth (nth 1 info) form) 0) ?*))
-                   (setq position (- position)))))
+                   (setq offset (- offset)))))
 
         (if preface
             (progn
@@ -2107,19 +2152,21 @@
                           (not non-nil)))
                    ;; Output the byte code and constants specially
                    ;; for lazy dynamic loading.
-                   (let ((position
+                   (let ((offset
                           (byte-compile-output-as-comment
                            (cons (car form) (nth 1 form))
                            t)))
-                     (princ (format "(#$ . %d) nil" position)
-                            byte-compile--outbuffer)
+                     (princ "(#$ . 0) nil" byte-compile--outbuffer)
+                     (push (cons (- (point) 6) offset) byte-compile--backpatch)
                      (setq form (cdr form))
                      (setq index (1+ index))))
                   ((= index (nth 1 info))
-                   (if position
-                       (princ (format (if quoted "'(#$ . %d)"  "(#$ . %d)")
-                                      position)
-                              byte-compile--outbuffer)
+                   (if offset
+                       (progn
+                         (princ (if quoted "'(#$ . 0)"  "(#$ . 0)")
+                                byte-compile--outbuffer)
+                         (push (cons (- (point) 2) offset)
+                               byte-compile--backpatch))
                      (let ((print-escape-newlines nil))
                        (goto-char (prog1 (1+ (point))
                                     (prin1 (car form)
@@ -2436,34 +2483,34 @@
     (let ((position (point)))
 
       ;; Insert EXP, and make it a comment with #@LENGTH.
-      (insert " ")
+      ;; FIXME:compat: (insert " ")
       (if quoted
           (prin1 exp byte-compile--outbuffer)
         (princ exp byte-compile--outbuffer))
-      (goto-char position)
-      ;; Quote certain special characters as needed.
-      ;; get_doc_string in doc.c does the unquoting.
-      (while (search-forward "\^A" nil t)
-        (replace-match "\^A\^A" t t))
-      (goto-char position)
-      (while (search-forward "\000" nil t)
-        (replace-match "\^A0" t t))
-      (goto-char position)
-      (while (search-forward "\037" nil t)
-        (replace-match "\^A_" t t))
-      (goto-char (point-max))
-      (insert "\037")
-      (goto-char position)
-      (insert "#@" (format "%d" (- (position-bytes (point-max))
-                                   (position-bytes position))))
+      (let ((end (copy-marker (point) t)))
+        (goto-char position)
+        ;; Quote certain special characters as needed.
+        ;; get_doc_string in doc.c does the unquoting.
+        (while (search-forward "\^A" end t)
+          (replace-match "\^A\^A" t t))
+        (goto-char position)
+        (while (search-forward "\000" end t)
+          (replace-match "\^A0" t t))
+        (goto-char position)
+        (while (search-forward "\037" end t)
+          (replace-match "\^A_" t t))
+        (goto-char end)
+        (insert "\037")
+        (goto-char position)
+        ;; FIXME:compat: (insert "#@1")
+        )
 
-      ;; Save the file position of the object.
-      ;; Note we add 1 to skip the space that we inserted before the actual doc
-      ;; string, and subtract point-min to convert from an 1-origin Emacs
-      ;; position to a file position.
+      ;; Save the file offset (from the end) of the object.
+      ;; We add 1 to skip the space that we inserted before the actual doc
+      ;; string.
       (prog1
-          (- (position-bytes (point)) (point-min) -1)
-        (goto-char (point-max))))))
+          (- (position-bytes (point-max)) (position-bytes position)) ;; FIXME:compat: (1+ (point))
+        (goto-char position)))))
 
 (defun byte-compile--reify-function (fun)
   "Return an expression which will evaluate to a function value FUN.

=== modified file 'src/doc.c'
--- a/src/doc.c	2013-02-09 04:52:53 +0000
+++ b/src/doc.c	2013-02-09 04:53:01 +0000
@@ -215,14 +215,17 @@
   if (CONSP (filepos))
     {
       int test = 1;
-      if (get_doc_string_buffer[offset - test++] != ' ')
-	return Qnil;
-      while (get_doc_string_buffer[offset - test] >= '0'
-	     && get_doc_string_buffer[offset - test] <= '9')
-	test++;
-      if (get_doc_string_buffer[offset - test++] != '@'
-	  || get_doc_string_buffer[offset - test] != '#')
-	return Qnil;
+      if (get_doc_string_buffer[offset - test] != '\037')
+	{
+	  if (get_doc_string_buffer[offset - test++] != ' ')
+	    return Qnil;
+	  while (get_doc_string_buffer[offset - test] >= '0'
+		 && get_doc_string_buffer[offset - test] <= '9')
+	    test++;
+	  if (get_doc_string_buffer[offset - test++] != '@'
+	      || get_doc_string_buffer[offset - test] != '#')
+	    return Qnil;
+	}
     }
   else
     {

=== modified file 'src/lread.c'
--- a/src/lread.c	2013-02-09 04:52:53 +0000
+++ b/src/lread.c	2013-02-09 04:53:01 +0000
@@ -348,11 +348,14 @@
   return STRING_CHAR (buf);
 }
 
+#define FROM_FILE_P(readcharfun)			\
+  (EQ (readcharfun, Qget_file_char)			\
+   || EQ (readcharfun, Qget_emacs_mule_file_char))
+
 static void
 skip_dyn_bytes (Lisp_Object readcharfun, ptrdiff_t n)
 {
-  if (EQ (readcharfun, Qget_file_char)
-      || EQ (readcharfun, Qget_emacs_mule_file_char))
+  if (FROM_FILE_P (readcharfun))
     {
       block_input ();		/* FIXME: Not sure if it's needed.  */
       fseek (instream, n, SEEK_CUR);
@@ -372,6 +375,19 @@
     }
 }
 
+static void
+skip_dyn_eof (Lisp_Object readcharfun)
+{
+  if (FROM_FILE_P (readcharfun))
+    {
+      block_input ();		/* FIXME: Not sure if it's needed.  */
+      fseek (instream, 0, SEEK_END);
+      unblock_input ();
+    }
+  else
+    while (READCHAR >= 0);
+}
+
 /* Unread the character C in the way appropriate for the stream READCHARFUN.
    If the stream is a user function, call it with the char as argument.  */
 
@@ -423,8 +439,7 @@
     {
       unread_char = c;
     }
-  else if (EQ (readcharfun, Qget_file_char)
-	   || EQ (readcharfun, Qget_emacs_mule_file_char))
+  else if (FROM_FILE_P (readcharfun))
     {
       unread_char = c;
     }
@@ -2615,7 +2630,7 @@
       if (c == '@')
 	{
 	  enum { extra = 100 };
-	  ptrdiff_t i, nskip = 0;
+	  ptrdiff_t i, nskip = 0, digits = 0;
 
 	  /* Read a decimal integer.  */
 	  while ((c = READCHAR) >= 0
@@ -2623,8 +2638,14 @@
 	    {
 	      if ((STRING_BYTES_BOUND - extra) / 10 <= nskip)
 		string_overflow ();
+	      digits++;
 	      nskip *= 10;
 	      nskip += c - '0';
+	      if (digits == 2 && nskip == 0)
+		{ /* We've just seen #@00, which means "skip to end".  */
+		  skip_dyn_eof (readcharfun);
+		  return Qnil;
+		}
 	    }
 	  if (nskip > 0)
 	    /* We can't use UNREAD here, because in the code below we side-step
@@ -2636,8 +2657,7 @@
 	    UNREAD (c);
 	    
 	  if (load_force_doc_strings
-	      && (EQ (readcharfun, Qget_file_char)
-		  || EQ (readcharfun, Qget_emacs_mule_file_char)))
+	      && (FROM_FILE_P (readcharfun)))
 	    {
 	      /* If we are supposed to force doc strings into core right now,
 		 record the last string that we skipped,
@@ -3573,8 +3593,10 @@
 		{
 		  if (doc_reference == 1)
 		    return make_number (0);
-		  if (doc_reference == 2)
+		  if (doc_reference == 2 && INTEGERP (XCDR (val)))
 		    {
+		      char *saved = NULL;
+		      file_offset saved_position;
 		      /* Get a doc string from the file we are loading.
 			 If it's in saved_doc_string, get it from there.
 
@@ -3591,65 +3613,42 @@
 			  && pos < (saved_doc_string_position
 				    + saved_doc_string_length))
 			{
-			  ptrdiff_t start = pos - saved_doc_string_position;
-			  ptrdiff_t from, to;
-
-			  /* Process quoting with ^A,
-			     and find the end of the string,
-			     which is marked with ^_ (037).  */
-			  for (from = start, to = start;
-			       saved_doc_string[from] != 037;)
-			    {
-			      int c = saved_doc_string[from++];
-			      if (c == 1)
-				{
-				  c = saved_doc_string[from++];
-				  if (c == 1)
-				    saved_doc_string[to++] = c;
-				  else if (c == '0')
-				    saved_doc_string[to++] = 0;
-				  else if (c == '_')
-				    saved_doc_string[to++] = 037;
-				}
-			      else
-				saved_doc_string[to++] = c;
-			    }
-
-			  return make_unibyte_string (saved_doc_string + start,
-						      to - start);
+			  saved = saved_doc_string;
+			  saved_position = saved_doc_string_position;
 			}
 		      /* Look in prev_saved_doc_string the same way.  */
 		      else if (pos >= prev_saved_doc_string_position
 			       && pos < (prev_saved_doc_string_position
 					 + prev_saved_doc_string_length))
 			{
-			  ptrdiff_t start =
-			    pos - prev_saved_doc_string_position;
+			  saved = prev_saved_doc_string;
+			  saved_position = prev_saved_doc_string_position;
+			}
+		      if (saved)
+			{
+			  ptrdiff_t start = pos - saved_position;
 			  ptrdiff_t from, to;
 
 			  /* Process quoting with ^A,
 			     and find the end of the string,
 			     which is marked with ^_ (037).  */
 			  for (from = start, to = start;
-			       prev_saved_doc_string[from] != 037;)
+			       saved[from] != 037;)
 			    {
-			      int c = prev_saved_doc_string[from++];
+			      int c = saved[from++];
 			      if (c == 1)
 				{
-				  c = prev_saved_doc_string[from++];
-				  if (c == 1)
-				    prev_saved_doc_string[to++] = c;
-				  else if (c == '0')
-				    prev_saved_doc_string[to++] = 0;
-				  else if (c == '_')
-				    prev_saved_doc_string[to++] = 037;
+				  c = saved[from++];
+				  saved[to++] = (c == 1 ? c
+						 : c == '0' ? 0
+						 : c == '_' ? 037
+						 : c);
 				}
 			      else
-				prev_saved_doc_string[to++] = c;
+				saved[to++] = c;
 			    }
 
-			  return make_unibyte_string (prev_saved_doc_string
-						      + start,
+			  return make_unibyte_string (saved + start,
 						      to - start);
 			}
 		      else




             reply	other threads:[~2013-02-09  5:13 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-02-09  5:13 Stefan Monnier [this message]
2013-02-09  9:06 ` Loading dynamic docstrings and bytecode more lazily Helmut Eller
2013-02-09 11:13 ` Lluís
2013-02-09 20:09 ` Glenn Morris
2013-02-10  2:02   ` Stefan Monnier

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=jwvwqui3wsz.fsf-monnier+emacs@gnu.org \
    --to=monnier@iro.umontreal.ca \
    --cc=emacs-devel@gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this external index

	https://git.savannah.gnu.org/cgit/emacs.git
	https://git.savannah.gnu.org/cgit/emacs/org-mode.git

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.