unofficial mirror of emacs-devel@gnu.org 
 help / color / mirror / code / Atom feed
* Not using DOC for ELisp files
@ 2021-12-28  1:48 Stefan Monnier
  2021-12-28  2:25 ` Po Lu
                   ` (4 more replies)
  0 siblings, 5 replies; 31+ messages in thread
From: Stefan Monnier @ 2021-12-28  1:48 UTC (permalink / raw)
  To: emacs-devel

The patch below removes from the DOC file the docstrings coming from
ELisp files.

In numbers (in my tests), this reduces the size of DOC from about 3.2MB
to about 850kB, and increases the pdmp size by about 53kB or about 0.7%.

The benefits aren't great, obviously, but it does remove some warts in
the `lread.c` code (into which I bumped in the `scratch/fcr` branch,
which is why this comes up now).

I have a vague recollection that such a change was mentioned in some
bugreport-discussion some months ago but I can't remember where.

Comments?


        Stefan


diff --git a/lib-src/make-docfile.c b/lib-src/make-docfile.c
index d17c28be90..4f4b135589 100644
--- a/lib-src/make-docfile.c
+++ b/lib-src/make-docfile.c
@@ -20,7 +20,7 @@ along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.  */
 
 
 /* The arguments given to this program are all the C and Lisp source files
- of GNU Emacs.  .elc and .el and .c files are allowed.
+ of GNU Emacs.  .c files are allowed.
  A .o file can also be specified; the .c file it was made from is used.
  This helps the makefile pass the correct list of files.
  Option -d DIR means change to DIR before looking for files.
@@ -62,13 +62,9 @@ along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.  */
    Similarly, msdos defines this as sys_chdir, but we're not linking with the
    file where that function is defined.  */
 #undef chdir
-#define IS_SLASH(c)  ((c) == '/' || (c) == '\\' || (c) == ':')
-#else  /* not DOS_NT */
-#define IS_SLASH(c)  ((c) == '/')
 #endif /* not DOS_NT */
 
 static void scan_file (char *filename);
-static void scan_lisp_file (const char *filename, const char *mode);
 static void scan_c_file (char *filename, const char *mode);
 static void scan_c_stream (FILE *infile);
 static void start_globals (void);
@@ -238,16 +234,9 @@ put_filename (char *filename)
 static void
 scan_file (char *filename)
 {
-  ptrdiff_t len = strlen (filename);
-
   if (!generate_globals)
     put_filename (filename);
-  if (len > 4 && !strcmp (filename + len - 4, ".elc"))
-    scan_lisp_file (filename, "rb");
-  else if (len > 3 && !strcmp (filename + len - 3, ".el"))
-    scan_lisp_file (filename, "r");
-  else
-    scan_c_file (filename, "r");
+  scan_c_file (filename, "r");
 }
 
 static void
@@ -1225,453 +1214,4 @@ scan_c_stream (FILE *infile)
     fatal ("read error");
 }
 \f
-/* Read a file of Lisp code, compiled or interpreted.
- Looks for
-  (defun NAME ARGS DOCSTRING ...)
-  (defmacro NAME ARGS DOCSTRING ...)
-  (defsubst NAME ARGS DOCSTRING ...)
-  (autoload (quote NAME) FILE DOCSTRING ...)
-  (defvar NAME VALUE DOCSTRING)
-  (defconst NAME VALUE DOCSTRING)
-  (fset (quote NAME) (make-byte-code ... DOCSTRING ...))
-  (fset (quote NAME) #[... DOCSTRING ...])
-  (defalias (quote NAME) #[... DOCSTRING ...])
-  (custom-declare-variable (quote NAME) VALUE DOCSTRING ...)
- starting in column zero.
- (quote NAME) may appear as 'NAME as well.
-
- We also look for #@LENGTH CONTENTS^_ at the beginning of the line.
- When we find that, we save it for the following defining-form,
- and we use that instead of reading a doc string within that defining-form.
-
- For defvar, defconst, and fset we skip to the docstring with a kludgy
- formatting convention: all docstrings must appear on the same line as the
- initial open-paren (the one in column zero) and must contain a backslash
- and a newline immediately after the initial double-quote.  No newlines
- must appear between the beginning of the form and the first double-quote.
- For defun, defmacro, and autoload, we know how to skip over the
- arglist, but the doc string must still have a backslash and newline
- immediately after the double quote.
- The only source files that must follow this convention are preloaded
- uncompiled ones like loaddefs.el; aside from that, it is always the .elc
- file that we should look at, and they are no problem because byte-compiler
- output follows this convention.
- The NAME and DOCSTRING are output.
- NAME is preceded by `F' for a function or `V' for a variable.
- An entry is output only if DOCSTRING has \ newline just after the opening ".
- */
-
-static void
-skip_white (FILE *infile)
-{
-  int c;
-  do
-    c = getc (infile);
-  while (c_isspace (c));
-
-  ungetc (c, infile);
-}
-
-static void
-read_lisp_symbol (FILE *infile, char *buffer)
-{
-  int c;
-  char *fillp = buffer;
-
-  skip_white (infile);
-  while (true)
-    {
-      c = getc (infile);
-      if (c == '\\')
-	{
-	  c = getc (infile);
-	  if (c < 0)
-	    return;
-	  *fillp++ = c;
-	}
-      else if (c_isspace (c) || c == '(' || c == ')' || c < 0)
-	{
-	  ungetc (c, infile);
-	  *fillp = 0;
-	  break;
-	}
-      else
-	*fillp++ = c;
-    }
-
-  if (! buffer[0])
-    fprintf (stderr, "## expected a symbol, got '%c'\n", c);
-
-  skip_white (infile);
-}
-
-static bool
-search_lisp_doc_at_eol (FILE *infile)
-{
-  int c = 0, c1 = 0, c2 = 0;
-
-  /* Skip until the end of line; remember two previous chars.  */
-  while (c != '\n' && c != '\r' && c != EOF)
-    {
-      c2 = c1;
-      c1 = c;
-      c = getc (infile);
-    }
-
-  /* If two previous characters were " and \,
-     this is a doc string.  Otherwise, there is none.  */
-  if (c2 != '"' || c1 != '\\')
-    {
-#ifdef DEBUG
-      fprintf (stderr, "## non-docstring found\n");
-#endif
-      ungetc (c, infile);
-      return false;
-    }
-  return true;
-}
-
-#define DEF_ELISP_FILE(fn)  { #fn, sizeof(#fn) - 1 }
-
-static void
-scan_lisp_file (const char *filename, const char *mode)
-{
-  FILE *infile;
-  int c;
-  char *saved_string = 0;
-  /* These are the only files that are loaded uncompiled, and must
-     follow the conventions of the doc strings expected by this
-     function.  These conventions are automatically followed by the
-     byte compiler when it produces the .elc files.  */
-  static struct {
-    const char *fn;
-    int fl;
-  } const uncompiled[] = {
-    DEF_ELISP_FILE (loaddefs.el),
-    DEF_ELISP_FILE (loadup.el),
-    DEF_ELISP_FILE (charprop.el),
-    DEF_ELISP_FILE (cp51932.el),
-    DEF_ELISP_FILE (eucjp-ms.el)
-  };
-  int i;
-  int flen = strlen (filename);
-
-  if (generate_globals)
-    fatal ("scanning lisp file when -g specified");
-  if (flen > 3 && !strcmp (filename + flen - 3, ".el"))
-    {
-      bool match = false;
-      for (i = 0; i < sizeof (uncompiled) / sizeof (uncompiled[0]); i++)
-	{
-	  if (uncompiled[i].fl <= flen
-	      && !strcmp (filename + flen - uncompiled[i].fl, uncompiled[i].fn)
-	      && (flen == uncompiled[i].fl
-		  || IS_SLASH (filename[flen - uncompiled[i].fl - 1])))
-	    {
-	      match = true;
-	      break;
-	    }
-	}
-      if (!match)
-	fatal ("uncompiled lisp file %s is not supported", filename);
-    }
-
-  infile = fopen (filename, mode);
-  if (infile == NULL)
-    {
-      perror (filename);
-      exit (EXIT_FAILURE);
-    }
-
-  c = '\n';
-  while (!feof (infile))
-    {
-      char buffer[BUFSIZ];
-      char type;
-
-      /* If not at end of line, skip till we get to one.  */
-      if (c != '\n' && c != '\r')
-	{
-	  c = getc (infile);
-	  continue;
-	}
-      /* Skip the line break.  */
-      while (c == '\n' || c == '\r')
-	c = getc (infile);
-      /* Detect a dynamic doc string and save it for the next expression.  */
-      if (c == '#')
-	{
-	  c = getc (infile);
-	  if (c == '@')
-	    {
-	      ptrdiff_t length = 0;
-	      ptrdiff_t i;
-
-	      /* Read the length.  */
-	      while ((c = getc (infile),
-		      c_isdigit (c)))
-		{
-		  if (INT_MULTIPLY_WRAPV (length, 10, &length)
-		      || INT_ADD_WRAPV (length, c - '0', &length)
-		      || SIZE_MAX < length)
-		    memory_exhausted ();
-		}
-
-	      if (length <= 1)
-		fatal ("invalid dynamic doc string length");
-
-	      if (c != ' ')
-		fatal ("space not found after dynamic doc string length");
-
-	      /* The next character is a space that is counted in the length
-		 but not part of the doc string.
-		 We already read it, so just ignore it.  */
-	      length--;
-
-	      /* Read in the contents.  */
-	      free (saved_string);
-	      saved_string = xmalloc (length);
-	      for (i = 0; i < length; i++)
-		saved_string[i] = getc (infile);
-	      /* The last character is a ^_.
-		 That is needed in the .elc file
-		 but it is redundant in DOC.  So get rid of it here.  */
-	      saved_string[length - 1] = 0;
-	      /* Skip the line break.  */
-	      while (c == '\n' || c == '\r')
-		c = getc (infile);
-	      /* Skip the following line.  */
-	      while (! (c == '\n' || c == '\r' || c < 0))
-		c = getc (infile);
-	    }
-	  continue;
-	}
-
-      if (c != '(')
-	continue;
-
-      read_lisp_symbol (infile, buffer);
-
-      if (! strcmp (buffer, "defun")
-	  || ! strcmp (buffer, "defmacro")
-	  || ! strcmp (buffer, "defsubst"))
-	{
-	  type = 'F';
-	  read_lisp_symbol (infile, buffer);
-
-	  /* Skip the arguments: either "nil" or a list in parens.  */
-
-	  c = getc (infile);
-	  if (c == 'n') /* nil */
-	    {
-	      if ((c = getc (infile)) != 'i'
-		  || (c = getc (infile)) != 'l')
-		{
-		  fprintf (stderr, "## unparsable arglist in %s (%s)\n",
-			   buffer, filename);
-		  continue;
-		}
-	    }
-	  else if (c != '(')
-	    {
-	      fprintf (stderr, "## unparsable arglist in %s (%s)\n",
-		       buffer, filename);
-	      continue;
-	    }
-	  else
-	    while (! (c == ')' || c < 0))
-	      c = getc (infile);
-	  skip_white (infile);
-
-	  /* If the next three characters aren't `dquote bslash newline'
-	     then we're not reading a docstring.
-	   */
-	  if ((c = getc (infile)) != '"'
-	      || (c = getc (infile)) != '\\'
-	      || ((c = getc (infile)) != '\n' && c != '\r'))
-	    {
-#ifdef DEBUG
-	      fprintf (stderr, "## non-docstring in %s (%s)\n",
-		       buffer, filename);
-#endif
-	      continue;
-	    }
-	}
-
-      /* defcustom can only occur in uncompiled Lisp files.  */
-      else if (! strcmp (buffer, "defvar")
-	       || ! strcmp (buffer, "defconst")
-	       || ! strcmp (buffer, "defcustom"))
-	{
-	  type = 'V';
-	  read_lisp_symbol (infile, buffer);
-
-	  if (saved_string == 0)
-	    if (!search_lisp_doc_at_eol (infile))
-	      continue;
-	}
-
-      else if (! strcmp (buffer, "custom-declare-variable")
-	       || ! strcmp (buffer, "defvaralias")
-	       )
-	{
-	  type = 'V';
-
-	  c = getc (infile);
-	  if (c == '\'')
-	    read_lisp_symbol (infile, buffer);
-	  else
-	    {
-	      if (c != '(')
-		{
-		  fprintf (stderr,
-			   "## unparsable name in custom-declare-variable in %s\n",
-			   filename);
-		  continue;
-		}
-	      read_lisp_symbol (infile, buffer);
-	      if (strcmp (buffer, "quote"))
-		{
-		  fprintf (stderr,
-			   "## unparsable name in custom-declare-variable in %s\n",
-			   filename);
-		  continue;
-		}
-	      read_lisp_symbol (infile, buffer);
-	      c = getc (infile);
-	      if (c != ')')
-		{
-		  fprintf (stderr,
-			   "## unparsable quoted name in custom-declare-variable in %s\n",
-			   filename);
-		  continue;
-		}
-	    }
-
-	  if (saved_string == 0)
-	    if (!search_lisp_doc_at_eol (infile))
-	      continue;
-	}
-
-      else if (! strcmp (buffer, "fset") || ! strcmp (buffer, "defalias"))
-	{
-	  type = 'F';
-
-	  c = getc (infile);
-	  if (c == '\'')
-	    read_lisp_symbol (infile, buffer);
-	  else
-	    {
-	      if (c != '(')
-		{
-		  fprintf (stderr, "## unparsable name in fset in %s\n",
-			   filename);
-		  continue;
-		}
-	      read_lisp_symbol (infile, buffer);
-	      if (strcmp (buffer, "quote"))
-		{
-		  fprintf (stderr, "## unparsable name in fset in %s\n",
-			   filename);
-		  continue;
-		}
-	      read_lisp_symbol (infile, buffer);
-	      c = getc (infile);
-	      if (c != ')')
-		{
-		  fprintf (stderr,
-			   "## unparsable quoted name in fset in %s\n",
-			   filename);
-		  continue;
-		}
-	    }
-
-	  if (saved_string == 0)
-	    if (!search_lisp_doc_at_eol (infile))
-	      continue;
-	}
-
-      else if (! strcmp (buffer, "autoload"))
-	{
-	  type = 'F';
-	  c = getc (infile);
-	  if (c == '\'')
-	    read_lisp_symbol (infile, buffer);
-	  else
-	    {
-	      if (c != '(')
-		{
-		  fprintf (stderr, "## unparsable name in autoload in %s\n",
-			   filename);
-		  continue;
-		}
-	      read_lisp_symbol (infile, buffer);
-	      if (strcmp (buffer, "quote"))
-		{
-		  fprintf (stderr, "## unparsable name in autoload in %s\n",
-			   filename);
-		  continue;
-		}
-	      read_lisp_symbol (infile, buffer);
-	      c = getc (infile);
-	      if (c != ')')
-		{
-		  fprintf (stderr,
-			   "## unparsable quoted name in autoload in %s\n",
-			   filename);
-		  continue;
-		}
-	    }
-	  skip_white (infile);
-	  c = getc (infile);
-	  if (c != '\"')
-	    {
-	      fprintf (stderr, "## autoload of %s unparsable (%s)\n",
-		       buffer, filename);
-	      continue;
-	    }
-	  read_c_string_or_comment (infile, 0, false, 0);
-
-	  if (saved_string == 0)
-	    if (!search_lisp_doc_at_eol (infile))
-	      continue;
-	}
-
-#ifdef DEBUG
-      else if (! strcmp (buffer, "if")
-	       || ! strcmp (buffer, "byte-code"))
-	continue;
-#endif
-
-      else
-	{
-#ifdef DEBUG
-	  fprintf (stderr, "## unrecognized top-level form, %s (%s)\n",
-		   buffer, filename);
-#endif
-	  continue;
-	}
-
-      /* At this point, we should either use the previous dynamic doc string in
-	 saved_string or gobble a doc string from the input file.
-	 In the latter case, the opening quote (and leading backslash-newline)
-	 have already been read.  */
-
-      printf ("\037%c%s\n", type, buffer);
-      if (saved_string)
-	{
-	  fputs (saved_string, stdout);
-	  /* Don't use one dynamic doc string twice.  */
-	  free (saved_string);
-	  saved_string = 0;
-	}
-      else
-	read_c_string_or_comment (infile, 1, false, 0);
-    }
-  free (saved_string);
-  if (ferror (infile) || fclose (infile) != 0)
-    fatal ("%s: read error", filename);
-}
-
-
 /* make-docfile.c ends here */
diff --git a/lisp/startup.el b/lisp/startup.el
index b79467339b..727432a4cb 100644
--- a/lisp/startup.el
+++ b/lisp/startup.el
@@ -1056,6 +1056,9 @@ startup--load-user-init-file
     (when debug-on-error-should-be-set
       (setq debug-on-error debug-on-error-from-init-file))))
 
+(defvar lisp-directory nil
+  "Directory containing the Lisp files that come with GNU Emacs.")
+
 (defun command-line ()
   "A subroutine of `normal-top-level'.
 Amongst another things, it parses the command-line arguments."
@@ -1087,8 +1090,7 @@ command-line
   (let ((simple-file-name
 	 ;; Look for simple.el or simple.elc and use their directory
 	 ;; as the place where all Lisp files live.
-	 (locate-file "simple" load-path (get-load-suffixes)))
-	lisp-dir)
+	 (locate-file "simple" load-path (get-load-suffixes))))
     ;; Don't abort if simple.el cannot be found, but print a warning.
     ;; Although in most usage we are going to cryptically abort a moment
     ;; later anyway, due to missing required bidi data files (eg bug#13430).
@@ -1104,12 +1106,13 @@ command-line
 	  (unless (file-readable-p lispdir)
 	    (princ (format "Lisp directory %s not readable?" lispdir))
 	    (terpri)))
-      (setq lisp-dir (file-truename (file-name-directory simple-file-name)))
+      (setq lisp-directory
+            (file-truename (file-name-directory simple-file-name)))
       (setq load-history
 	    (mapcar (lambda (elt)
 		      (if (and (stringp (car elt))
 			       (not (file-name-absolute-p (car elt))))
-			  (cons (concat lisp-dir
+			  (cons (concat lisp-directory
 					(car elt))
 				(cdr elt))
 			elt))
diff --git a/src/Makefile.in b/src/Makefile.in
index ea4a7207ff..76e4675c2a 100644
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -642,13 +642,11 @@ $(pdmp):
 ## for the first time, this prevents any variation between configurations
 ## in the contents of the DOC file.
 ##
-$(etc)/DOC: lisp.mk $(libsrc)/make-docfile$(EXEEXT) $(doc_obj) $(lisp)
+$(etc)/DOC: $(libsrc)/make-docfile$(EXEEXT) $(doc_obj)
 	$(AM_V_GEN)$(MKDIR_P) $(etc)
 	$(AM_V_at)rm -f $(etc)/DOC
 	$(AM_V_at)$(libsrc)/make-docfile -d $(srcdir) \
 	  $(SOME_MACHINE_OBJECTS) $(doc_obj) > $(etc)/DOC
-	$(AM_V_at)$(libsrc)/make-docfile -a $(etc)/DOC -d $(lispsource) \
-	  $(shortlisp)
 
 $(libsrc)/make-docfile$(EXEEXT) $(libsrc)/make-fingerprint$(EXEEXT): \
   $(lib)/libgnu.a
diff --git a/src/doc.c b/src/doc.c
index 6be023bb93..17601c700e 100644
--- a/src/doc.c
+++ b/src/doc.c
@@ -84,16 +84,19 @@ get_doc_string (Lisp_Object filepos, bool unibyte, bool definition)
   char *from, *to, *name, *p, *p1;
   Lisp_Object file, pos;
   ptrdiff_t count = SPECPDL_INDEX ();
+  Lisp_Object dir;
   USE_SAFE_ALLOCA;
 
   if (FIXNUMP (filepos))
     {
       file = Vdoc_file_name;
+      dir = Vdoc_directory;
       pos = filepos;
     }
   else if (CONSP (filepos))
     {
       file = XCAR (filepos);
+      dir = Fsymbol_value (intern ("lisp-directory"));
       pos = XCDR (filepos);
     }
   else
@@ -101,7 +104,7 @@ get_doc_string (Lisp_Object filepos, bool unibyte, bool definition)
 
   EMACS_INT position = eabs (XFIXNUM (pos));
 
-  if (!STRINGP (Vdoc_directory))
+  if (!STRINGP (dir))
     return Qnil;
 
   if (!STRINGP (file))
@@ -113,7 +116,7 @@ get_doc_string (Lisp_Object filepos, bool unibyte, bool definition)
   Lisp_Object tem = Ffile_name_absolute_p (file);
   file = ENCODE_FILE (file);
   Lisp_Object docdir
-    = NILP (tem) ? ENCODE_FILE (Vdoc_directory) : empty_unibyte_string;
+    = NILP (tem) ? ENCODE_FILE (dir) : empty_unibyte_string;
   ptrdiff_t docdir_sizemax = SBYTES (docdir) + 1;
   if (will_dump_p ())
     docdir_sizemax = max (docdir_sizemax, sizeof sibling_etc);
diff --git a/src/lread.c b/src/lread.c
index 4992576414..55b3d473dc 100644
--- a/src/lread.c
+++ b/src/lread.c
@@ -1545,7 +1545,7 @@ Return t if the file exists and loads successfully.  */)
 	message_with_string ("Loading %s...", file, 1);
     }
 
-  specbind (Qload_file_name, found_eff);
+  specbind (Qload_file_name, hist_file_name);
   specbind (Qload_true_file_name, found);
   specbind (Qinhibit_file_name_operation, Qnil);
   specbind (Qload_in_progress, Qt);
@@ -3224,23 +3224,6 @@ read1 (Lisp_Object readcharfun, int *pch, bool first_in_list)
 		    Fstring_as_unibyte (AREF (tmp, COMPILED_BYTECODE)));
 	    }
 
-	  if (COMPILED_DOC_STRING < ASIZE (tmp)
-	      && EQ (AREF (tmp, COMPILED_DOC_STRING), make_fixnum (0)))
-	    {
-	      /* read_list found a docstring like '(#$ . 5521)' and treated it
-		 as 0.  This placeholder 0 would lead to accidental sharing in
-		 purecopy's hash-consing, so replace it with a (hopefully)
-		 unique integer placeholder, which is negative so that it is
-		 not confused with a DOC file offset (the USE_LSB_TAG shift
-		 relies on the fact that VALMASK is one bit narrower than
-		 INTMASK).  Eventually Snarf-documentation should replace the
-		 placeholder with the actual docstring.  */
-	      verify (INTMASK & ~VALMASK);
-	      EMACS_UINT hash = ((XHASH (tmp) >> USE_LSB_TAG)
-				 | (INTMASK - INTMASK / 2));
-	      ASET (tmp, COMPILED_DOC_STRING, make_ufixnum (hash));
-	    }
-
 	  XSETPVECTYPE (vec, PVEC_COMPILED);
 	  return tmp;
 	}
@@ -4208,31 +4191,13 @@ read_list (bool flag, Lisp_Object readcharfun)
 
       /* While building, if the list starts with #$, treat it specially.  */
       if (EQ (elt, Vload_file_name)
-	  && ! NILP (elt)
-	  && !NILP (Vpurify_flag))
+	  && ! NILP (elt))
 	{
-	  if (NILP (Vdoc_file_name))
-	    /* We have not yet called Snarf-documentation, so assume
-	       this file is described in the DOC file
-	       and Snarf-documentation will fill in the right value later.
-	       For now, replace the whole list with 0.  */
-	    doc_reference = 1;
-	  else
-	    /* We have already called Snarf-documentation, so make a relative
-	       file name for this file, so it can be found properly
-	       in the installed Lisp directory.
-	       We don't use Fexpand_file_name because that would make
-	       the directory absolute now.  */
-	    {
-	      AUTO_STRING (dot_dot_lisp, "../lisp/");
-	      elt = concat2 (dot_dot_lisp, Ffile_name_nondirectory (elt));
-	    }
+	  if (!NILP (Vpurify_flag))
+	    doc_reference = 0;
+	  else if (load_force_doc_strings)
+	    doc_reference = 2;
 	}
-      else if (EQ (elt, Vload_file_name)
-	       && ! NILP (elt)
-	       && load_force_doc_strings)
-	doc_reference = 2;
-
       if (ch)
 	{
 	  if (flag > 0)
@@ -4253,8 +4218,6 @@ read_list (bool flag, Lisp_Object readcharfun)
 
 	      if (ch == ')')
 		{
-		  if (doc_reference == 1)
-		    return make_fixnum (0);
 		  if (doc_reference == 2 && FIXNUMP (XCDR (val)))
 		    {
 		      char *saved = NULL;




^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: Not using DOC for ELisp files
  2021-12-28  1:48 Not using DOC for ELisp files Stefan Monnier
@ 2021-12-28  2:25 ` Po Lu
  2021-12-28  3:48   ` Stefan Kangas
                     ` (2 more replies)
  2021-12-28  3:39 ` Stefan Kangas
                   ` (3 subsequent siblings)
  4 siblings, 3 replies; 31+ messages in thread
From: Po Lu @ 2021-12-28  2:25 UTC (permalink / raw)
  To: Stefan Monnier; +Cc: emacs-devel

Stefan Monnier <monnier@iro.umontreal.ca> writes:

> I have a vague recollection that such a change was mentioned in some
> bugreport-discussion some months ago but I can't remember where.
>
> Comments?

BTW, while we're removing things like this, how about getting rid of
pure space?  Is it really necessary in this millennium, even for the
unexec build?

That should work without pure space, right?

It would make the incremental garbage collector I'm slowly working
towards somewhat simpler.

Thanks.



^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: Not using DOC for ELisp files
  2021-12-28  1:48 Not using DOC for ELisp files Stefan Monnier
  2021-12-28  2:25 ` Po Lu
@ 2021-12-28  3:39 ` Stefan Kangas
  2021-12-28  5:10   ` Stefan Monnier
  2021-12-28  6:56 ` Lars Ingebrigtsen
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 31+ messages in thread
From: Stefan Kangas @ 2021-12-28  3:39 UTC (permalink / raw)
  To: Stefan Monnier, emacs-devel

Stefan Monnier <monnier@iro.umontreal.ca> writes:

> I have a vague recollection that such a change was mentioned in some
> bugreport-discussion some months ago but I can't remember where.

Bug#27748, maybe?

> Comments?

FWIW, I'm all for this simplification.

What about the comment about DOC in bytecomp.el?  Should that be removed
as well?



^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: Not using DOC for ELisp files
  2021-12-28  2:25 ` Po Lu
@ 2021-12-28  3:48   ` Stefan Kangas
  2021-12-28  5:39     ` Po Lu
  2021-12-28  4:11   ` LdBeth
  2021-12-28  7:10   ` Lars Ingebrigtsen
  2 siblings, 1 reply; 31+ messages in thread
From: Stefan Kangas @ 2021-12-28  3:48 UTC (permalink / raw)
  To: Po Lu, Stefan Monnier; +Cc: emacs-devel

Po Lu <luangruo@yahoo.com> writes:

> BTW, while we're removing things like this, how about getting rid of
> pure space?  Is it really necessary in this millennium, even for the
> unexec build?

See Bug#36649.

> It would make the incremental garbage collector I'm slowly working
> towards somewhat simpler.

Cool!  An incremental GC would be great, to say the least.



^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: Not using DOC for ELisp files
  2021-12-28  2:25 ` Po Lu
  2021-12-28  3:48   ` Stefan Kangas
@ 2021-12-28  4:11   ` LdBeth
  2021-12-28  5:03     ` Stefan Monnier
                       ` (2 more replies)
  2021-12-28  7:10   ` Lars Ingebrigtsen
  2 siblings, 3 replies; 31+ messages in thread
From: LdBeth @ 2021-12-28  4:11 UTC (permalink / raw)
  To: Po Lu; +Cc: Stefan Monnier, emacs-devel

>>>>> In <871r1xzcrl.fsf@yahoo.com> 
>>>>>	Po Lu <luangruo@yahoo.com> wrote:
Po> BTW, while we're removing things like this, how about getting rid
Po> of pure space?  Is it really necessary in this millennium, even
Po> for the unexec build?

Po> That should work without pure space, right?

Po> It would make the incremental garbage collector I'm slowly working
Po> towards somewhat simpler.

Having some data declared to be static can speed up GC scanning. It
does no longer really matter for sharing pure storage across different
emacs processes since today's emacsists get plenty of RAM IMO.
Probably that could be dropped in favor of a generational GC.

-- 
LDB



^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: Not using DOC for ELisp files
  2021-12-28  4:11   ` LdBeth
@ 2021-12-28  5:03     ` Stefan Monnier
  2021-12-28  5:38     ` Po Lu
  2021-12-28  9:52     ` Phil Sainty
  2 siblings, 0 replies; 31+ messages in thread
From: Stefan Monnier @ 2021-12-28  5:03 UTC (permalink / raw)
  To: LdBeth; +Cc: Po Lu, emacs-devel

> Having some data declared to be static can speed up GC scanning.

But ever since the switch to pdumper, the purespace is not treated
specially any more (i.e. it's scanned along with the rest of the heap),
so we already lost this benefit.


        Stefan




^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: Not using DOC for ELisp files
  2021-12-28  3:39 ` Stefan Kangas
@ 2021-12-28  5:10   ` Stefan Monnier
  0 siblings, 0 replies; 31+ messages in thread
From: Stefan Monnier @ 2021-12-28  5:10 UTC (permalink / raw)
  To: Stefan Kangas; +Cc: emacs-devel

>> I have a vague recollection that such a change was mentioned in some
>> bugreport-discussion some months ago but I can't remember where.
> Bug#27748, maybe?

That's the one, thanks.

> What about the comment about DOC in bytecomp.el?  Should that be removed
> as well?

Maybe not removed, but yes, I should adjust it accordingly.


        Stefan




^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: Not using DOC for ELisp files
  2021-12-28  4:11   ` LdBeth
  2021-12-28  5:03     ` Stefan Monnier
@ 2021-12-28  5:38     ` Po Lu
  2021-12-28  9:52     ` Phil Sainty
  2 siblings, 0 replies; 31+ messages in thread
From: Po Lu @ 2021-12-28  5:38 UTC (permalink / raw)
  To: LdBeth; +Cc: Stefan Monnier, emacs-devel

LdBeth <andpuke@foxmail.com> writes:

> Having some data declared to be static can speed up GC scanning.

I doubt it's significant.

> Probably that could be dropped in favor of a generational GC.

My opinion is that pure space constitutes a very primitive form of
generational garbage collection, where purified data is a permanant
"old" generation.

However, I doubt that gives any perceivable benefit to GC time today.



^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: Not using DOC for ELisp files
  2021-12-28  3:48   ` Stefan Kangas
@ 2021-12-28  5:39     ` Po Lu
  0 siblings, 0 replies; 31+ messages in thread
From: Po Lu @ 2021-12-28  5:39 UTC (permalink / raw)
  To: Stefan Kangas; +Cc: Stefan Monnier, emacs-devel

Stefan Kangas <stefankangas@gmail.com> writes:

> See Bug#36649.

Thanks.



^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: Not using DOC for ELisp files
  2021-12-28  1:48 Not using DOC for ELisp files Stefan Monnier
  2021-12-28  2:25 ` Po Lu
  2021-12-28  3:39 ` Stefan Kangas
@ 2021-12-28  6:56 ` Lars Ingebrigtsen
  2021-12-28 12:44 ` Eli Zaretskii
  2022-01-03 13:48 ` Ken Raeburn
  4 siblings, 0 replies; 31+ messages in thread
From: Lars Ingebrigtsen @ 2021-12-28  6:56 UTC (permalink / raw)
  To: Stefan Monnier; +Cc: emacs-devel

Stefan Monnier <monnier@iro.umontreal.ca> writes:

> The patch below removes from the DOC file the docstrings coming from
> ELisp files.

Makes sense to me.

-- 
(domestic pets only, the antidote for overdose, milk.)
   bloggy blog: http://lars.ingebrigtsen.no



^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: Not using DOC for ELisp files
  2021-12-28  2:25 ` Po Lu
  2021-12-28  3:48   ` Stefan Kangas
  2021-12-28  4:11   ` LdBeth
@ 2021-12-28  7:10   ` Lars Ingebrigtsen
  2 siblings, 0 replies; 31+ messages in thread
From: Lars Ingebrigtsen @ 2021-12-28  7:10 UTC (permalink / raw)
  To: Po Lu; +Cc: Stefan Monnier, emacs-devel

Po Lu <luangruo@yahoo.com> writes:

> BTW, while we're removing things like this, how about getting rid of
> pure space?  Is it really necessary in this millennium, even for the
> unexec build?

Yes, we should get rid of pure space.

> It would make the incremental garbage collector I'm slowly working
> towards somewhat simpler.

Wow, having incremental gc would be great.  😀

-- 
(domestic pets only, the antidote for overdose, milk.)
   bloggy blog: http://lars.ingebrigtsen.no



^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: Not using DOC for ELisp files
  2021-12-28  4:11   ` LdBeth
  2021-12-28  5:03     ` Stefan Monnier
  2021-12-28  5:38     ` Po Lu
@ 2021-12-28  9:52     ` Phil Sainty
  2021-12-28 10:31       ` Po Lu
  2 siblings, 1 reply; 31+ messages in thread
From: Phil Sainty @ 2021-12-28  9:52 UTC (permalink / raw)
  To: LdBeth; +Cc: Po Lu, Stefan Monnier, emacs-devel

On 2021-12-28 17:11, LdBeth wrote:
> Probably that could be dropped in favor of a generational GC.

I believe that Jeff Walsh is currently continuing work on the
generational GC started by Daniel Colascione.

* https://lists.gnu.org/archive/html/emacs-devel/2021-06/msg00175.html
* https://github.com/fejfighter/emacs/commits/feature/newgc




^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: Not using DOC for ELisp files
  2021-12-28  9:52     ` Phil Sainty
@ 2021-12-28 10:31       ` Po Lu
  2021-12-28 12:47         ` Po Lu
  0 siblings, 1 reply; 31+ messages in thread
From: Po Lu @ 2021-12-28 10:31 UTC (permalink / raw)
  To: Phil Sainty; +Cc: LdBeth, Stefan Monnier, emacs-devel

Phil Sainty <psainty@orcon.net.nz> writes:

> I believe that Jeff Walsh is currently continuing work on the
> generational GC started by Daniel Colascione.

I don't think a purely generational GC is the way to go.  It's
inappropriate for an interactive program like Emacs, where
responsiveness is more important than raw GC performance.

Besides, the goal of my garbage collector is to be minimally invasive.

It extends the existing mark-and-sweep garbage collector to act
incrementally, and makes use of hardware write barriers.  (And as such
is unlikely to work on machines that don't support VM and something like
`mprotect'.)

It's still a work-in-progress with some features completely missing, and
it also has a lot of low-hanging optimization fruit.  For example, when
a write barrier is hit, the faulting address is looked up inside
mem_root, and the object found is simply pushed back onto the scan
stack.  (Nothing fancy is done with objects such as conses, where
chances are the object being written to it can be immediately marked
because it will not be modified further.)

Incremental marking already works in undumped Emacs, except for a tough
bug involving car-nested conses that are scanned from intervals on the
stack that cross a page, but I haven't worked on incremental sweeping
yet, and it doesn't work in a dumped Emacs at all.

Thanks.



^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: Not using DOC for ELisp files
  2021-12-28  1:48 Not using DOC for ELisp files Stefan Monnier
                   ` (2 preceding siblings ...)
  2021-12-28  6:56 ` Lars Ingebrigtsen
@ 2021-12-28 12:44 ` Eli Zaretskii
  2021-12-28 17:14   ` Stefan Monnier
  2022-01-03 13:48 ` Ken Raeburn
  4 siblings, 1 reply; 31+ messages in thread
From: Eli Zaretskii @ 2021-12-28 12:44 UTC (permalink / raw)
  To: Stefan Monnier; +Cc: emacs-devel

> From: Stefan Monnier <monnier@iro.umontreal.ca>
> Date: Mon, 27 Dec 2021 20:48:22 -0500
> 
> The patch below removes from the DOC file the docstrings coming from
> ELisp files.

What does this mean in terms of the run-time requirements in order to
get the doc strings for preloaded files?  Does it mean their *.elc
files should be available from the installation tree?  Or does it mean
the *.el files (which might be compressed) must be available?  And
what does this mean for a natively-compiled installation?

If there are any non-trivial consequences of this change, i.e. distros
will now need to make sure something happens that they didn't before,
I think we should get their opinions before we install this.

Thanks.



^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: Not using DOC for ELisp files
  2021-12-28 10:31       ` Po Lu
@ 2021-12-28 12:47         ` Po Lu
  0 siblings, 0 replies; 31+ messages in thread
From: Po Lu @ 2021-12-28 12:47 UTC (permalink / raw)
  To: Phil Sainty; +Cc: LdBeth, Stefan Monnier, emacs-devel

Po Lu <luangruo@yahoo.com> writes:

> Incremental marking already works in undumped Emacs, except for a tough
> bug involving car-nested conses that are scanned from intervals on the
                                                                  ^^

Sorry for the typo, this should have been "reachable from".



^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: Not using DOC for ELisp files
  2021-12-28 12:44 ` Eli Zaretskii
@ 2021-12-28 17:14   ` Stefan Monnier
  2021-12-28 18:17     ` Eli Zaretskii
  0 siblings, 1 reply; 31+ messages in thread
From: Stefan Monnier @ 2021-12-28 17:14 UTC (permalink / raw)
  To: Eli Zaretskii; +Cc: emacs-devel

>> The patch below removes from the DOC file the docstrings coming from
>> ELisp files.
> What does this mean in terms of the run-time requirements in order to
> get the doc strings for preloaded files?  Does it mean their *.elc
> files should be available from the installation tree?

Yes, it means the docstring are fetched from the likes of
`lisp/files.elc` (as is already the case for the non-preloaded files)
instead of from `etc/DOC`.

> Or does it mean the *.el files (which might be compressed) must
> be available?

No, the `.el` files are not used any more than before.

> And what does this mean for a natively-compiled installation?

I don't think this has any impact.

> If there are any non-trivial consequences of this change, i.e. distros
> will now need to make sure something happens that they didn't before,
> I think we should get their opinions before we install this.

The only non-trivial consequence I can think of is when the compiled
preloaded files are missing.  E.g. if the distros remove
`lisp/files.elc`.  I don't aware of any distro/packaging doing it (even
tho it did make sense doing so to save a few bytes).


        Stefan




^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: Not using DOC for ELisp files
  2021-12-28 17:14   ` Stefan Monnier
@ 2021-12-28 18:17     ` Eli Zaretskii
  2021-12-29  0:15       ` Stefan Monnier
  0 siblings, 1 reply; 31+ messages in thread
From: Eli Zaretskii @ 2021-12-28 18:17 UTC (permalink / raw)
  To: Stefan Monnier, Andrea Corallo; +Cc: emacs-devel

> From: Stefan Monnier <monnier@iro.umontreal.ca>
> Cc: emacs-devel@gnu.org
> Date: Tue, 28 Dec 2021 12:14:43 -0500
> 
> Yes, it means the docstring are fetched from the likes of
> `lisp/files.elc` (as is already the case for the non-preloaded files)
> instead of from `etc/DOC`.
> 
> > Or does it mean the *.el files (which might be compressed) must
> > be available?
> 
> No, the `.el` files are not used any more than before.
> 
> > And what does this mean for a natively-compiled installation?
> 
> I don't think this has any impact.

So natively-compiled installations still need to make the *.elc files
available in the installation tree?

Andrea, is that a requirement regardless of the doc strings?  That is,
can one remove the *.elc files of the preloaded Lisp files once Emacs
is installed and the preloaded *.eln files are available?

> > If there are any non-trivial consequences of this change, i.e. distros
> > will now need to make sure something happens that they didn't before,
> > I think we should get their opinions before we install this.
> 
> The only non-trivial consequence I can think of is when the compiled
> preloaded files are missing.  E.g. if the distros remove
> `lisp/files.elc`.  I don't aware of any distro/packaging doing it (even
> tho it did make sense doing so to save a few bytes).

Well, I hope those of them who read this list will chime in if this is
of any concern.

Thanks.



^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: Not using DOC for ELisp files
  2021-12-28 18:17     ` Eli Zaretskii
@ 2021-12-29  0:15       ` Stefan Monnier
  2021-12-29 12:30         ` Johann Klähn
  2021-12-29 12:52         ` Eli Zaretskii
  0 siblings, 2 replies; 31+ messages in thread
From: Stefan Monnier @ 2021-12-29  0:15 UTC (permalink / raw)
  To: Eli Zaretskii; +Cc: Andrea Corallo, emacs-devel

Seeing how I haven't heard any opposition to the idea, I fixed a few
loose ends, and I think it's now ready.  See below.
Any objection?


        Stefan


2021-12-28  Stefan Monnier  <monnier@iro.umontreal.ca>

    * src/lread.c (Fload): Use relative file names for `load-file-name`
    when preloading for the dump, like we already did for `current-load-list`.
    (read_list): Don't zero-out dynamic docstring references during the
    preload since they won't be filled later by Snarf-documentation any more.
    (read1): Remove the hash-hack for doc references that were zeroed.

    * lisp/startup.el (lisp-directory): New variable.
    (command-line): Set it.

    * src/doc.c (get_doc_string): Use `lisp-directory` for dynamic
    docstring references using relative file names.
    (syms_of_doc): Add `Qlisp_directory`.

    * src/Makefile.in ($(etc)/DOC): Don't scan Lisp files any more.

    * lib-src/make-docfile.c (scan_file): Don't handle `.el` or `.elc`
    files any more.
    (IS_SLASH): Remove macro, not used any more.
    (skip_white, read_lisp_symbol, search_lisp_doc_at_eol)
    (scan_lisp_file): Remove functions, not used any more.

    * doc/lispref/loading.texi (Library Search): Mention `lisp-directory`.


diff --git a/doc/lispref/loading.texi b/doc/lispref/loading.texi
index e4cd940ab2..e6a7d77ac9 100644
--- a/doc/lispref/loading.texi
+++ b/doc/lispref/loading.texi
@@ -292,13 +292,16 @@ Library Search
 @end defvar
 
   When Emacs starts up, it sets up the value of @code{load-path}
-in several steps.  First, it initializes @code{load-path} using
-default locations set when Emacs was compiled.  Normally, this
-is a directory something like
+in several steps.  First, it initializes @code{lisp-directory} using
+default locations set when Emacs was compiled.
 
+@defvar lisp-directory
+Name of the directory holding Emacs's bundled Lisp files.
+Normally, this is a directory something like
 @example
 "/usr/local/share/emacs/@var{version}/lisp"
 @end example
+@end defvar
 
 (In this and the following examples, replace @file{/usr/local} with
 the installation prefix appropriate for your Emacs.)
diff --git a/etc/NEWS b/etc/NEWS
index 96e95967ef..3363ce0371 100644
--- a/etc/NEWS
+++ b/etc/NEWS
@@ -883,6 +883,9 @@ The input must be encoded text.
 \f
 * Lisp Changes in Emacs 29.1
 
++++
+** New variable 'lisp-directory' holds the directory of Emacs's own Lisp files.
+
 +++
 ** New facility for handling session state: 'multisession-value'.
 This can be used as a convenient way to store (simple) application
diff --git a/lib-src/make-docfile.c b/lib-src/make-docfile.c
index d17c28be90..4f4b135589 100644
--- a/lib-src/make-docfile.c
+++ b/lib-src/make-docfile.c
@@ -20,7 +20,7 @@ along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.  */
 
 
 /* The arguments given to this program are all the C and Lisp source files
- of GNU Emacs.  .elc and .el and .c files are allowed.
+ of GNU Emacs.  .c files are allowed.
  A .o file can also be specified; the .c file it was made from is used.
  This helps the makefile pass the correct list of files.
  Option -d DIR means change to DIR before looking for files.
@@ -62,13 +62,9 @@ along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.  */
    Similarly, msdos defines this as sys_chdir, but we're not linking with the
    file where that function is defined.  */
 #undef chdir
-#define IS_SLASH(c)  ((c) == '/' || (c) == '\\' || (c) == ':')
-#else  /* not DOS_NT */
-#define IS_SLASH(c)  ((c) == '/')
 #endif /* not DOS_NT */
 
 static void scan_file (char *filename);
-static void scan_lisp_file (const char *filename, const char *mode);
 static void scan_c_file (char *filename, const char *mode);
 static void scan_c_stream (FILE *infile);
 static void start_globals (void);
@@ -238,16 +234,9 @@ put_filename (char *filename)
 static void
 scan_file (char *filename)
 {
-  ptrdiff_t len = strlen (filename);
-
   if (!generate_globals)
     put_filename (filename);
-  if (len > 4 && !strcmp (filename + len - 4, ".elc"))
-    scan_lisp_file (filename, "rb");
-  else if (len > 3 && !strcmp (filename + len - 3, ".el"))
-    scan_lisp_file (filename, "r");
-  else
-    scan_c_file (filename, "r");
+  scan_c_file (filename, "r");
 }
 
 static void
@@ -1225,453 +1214,4 @@ scan_c_stream (FILE *infile)
     fatal ("read error");
 }
 \f
-/* Read a file of Lisp code, compiled or interpreted.
- Looks for
-  (defun NAME ARGS DOCSTRING ...)
-  (defmacro NAME ARGS DOCSTRING ...)
-  (defsubst NAME ARGS DOCSTRING ...)
-  (autoload (quote NAME) FILE DOCSTRING ...)
-  (defvar NAME VALUE DOCSTRING)
-  (defconst NAME VALUE DOCSTRING)
-  (fset (quote NAME) (make-byte-code ... DOCSTRING ...))
-  (fset (quote NAME) #[... DOCSTRING ...])
-  (defalias (quote NAME) #[... DOCSTRING ...])
-  (custom-declare-variable (quote NAME) VALUE DOCSTRING ...)
- starting in column zero.
- (quote NAME) may appear as 'NAME as well.
-
- We also look for #@LENGTH CONTENTS^_ at the beginning of the line.
- When we find that, we save it for the following defining-form,
- and we use that instead of reading a doc string within that defining-form.
-
- For defvar, defconst, and fset we skip to the docstring with a kludgy
- formatting convention: all docstrings must appear on the same line as the
- initial open-paren (the one in column zero) and must contain a backslash
- and a newline immediately after the initial double-quote.  No newlines
- must appear between the beginning of the form and the first double-quote.
- For defun, defmacro, and autoload, we know how to skip over the
- arglist, but the doc string must still have a backslash and newline
- immediately after the double quote.
- The only source files that must follow this convention are preloaded
- uncompiled ones like loaddefs.el; aside from that, it is always the .elc
- file that we should look at, and they are no problem because byte-compiler
- output follows this convention.
- The NAME and DOCSTRING are output.
- NAME is preceded by `F' for a function or `V' for a variable.
- An entry is output only if DOCSTRING has \ newline just after the opening ".
- */
-
-static void
-skip_white (FILE *infile)
-{
-  int c;
-  do
-    c = getc (infile);
-  while (c_isspace (c));
-
-  ungetc (c, infile);
-}
-
-static void
-read_lisp_symbol (FILE *infile, char *buffer)
-{
-  int c;
-  char *fillp = buffer;
-
-  skip_white (infile);
-  while (true)
-    {
-      c = getc (infile);
-      if (c == '\\')
-	{
-	  c = getc (infile);
-	  if (c < 0)
-	    return;
-	  *fillp++ = c;
-	}
-      else if (c_isspace (c) || c == '(' || c == ')' || c < 0)
-	{
-	  ungetc (c, infile);
-	  *fillp = 0;
-	  break;
-	}
-      else
-	*fillp++ = c;
-    }
-
-  if (! buffer[0])
-    fprintf (stderr, "## expected a symbol, got '%c'\n", c);
-
-  skip_white (infile);
-}
-
-static bool
-search_lisp_doc_at_eol (FILE *infile)
-{
-  int c = 0, c1 = 0, c2 = 0;
-
-  /* Skip until the end of line; remember two previous chars.  */
-  while (c != '\n' && c != '\r' && c != EOF)
-    {
-      c2 = c1;
-      c1 = c;
-      c = getc (infile);
-    }
-
-  /* If two previous characters were " and \,
-     this is a doc string.  Otherwise, there is none.  */
-  if (c2 != '"' || c1 != '\\')
-    {
-#ifdef DEBUG
-      fprintf (stderr, "## non-docstring found\n");
-#endif
-      ungetc (c, infile);
-      return false;
-    }
-  return true;
-}
-
-#define DEF_ELISP_FILE(fn)  { #fn, sizeof(#fn) - 1 }
-
-static void
-scan_lisp_file (const char *filename, const char *mode)
-{
-  FILE *infile;
-  int c;
-  char *saved_string = 0;
-  /* These are the only files that are loaded uncompiled, and must
-     follow the conventions of the doc strings expected by this
-     function.  These conventions are automatically followed by the
-     byte compiler when it produces the .elc files.  */
-  static struct {
-    const char *fn;
-    int fl;
-  } const uncompiled[] = {
-    DEF_ELISP_FILE (loaddefs.el),
-    DEF_ELISP_FILE (loadup.el),
-    DEF_ELISP_FILE (charprop.el),
-    DEF_ELISP_FILE (cp51932.el),
-    DEF_ELISP_FILE (eucjp-ms.el)
-  };
-  int i;
-  int flen = strlen (filename);
-
-  if (generate_globals)
-    fatal ("scanning lisp file when -g specified");
-  if (flen > 3 && !strcmp (filename + flen - 3, ".el"))
-    {
-      bool match = false;
-      for (i = 0; i < sizeof (uncompiled) / sizeof (uncompiled[0]); i++)
-	{
-	  if (uncompiled[i].fl <= flen
-	      && !strcmp (filename + flen - uncompiled[i].fl, uncompiled[i].fn)
-	      && (flen == uncompiled[i].fl
-		  || IS_SLASH (filename[flen - uncompiled[i].fl - 1])))
-	    {
-	      match = true;
-	      break;
-	    }
-	}
-      if (!match)
-	fatal ("uncompiled lisp file %s is not supported", filename);
-    }
-
-  infile = fopen (filename, mode);
-  if (infile == NULL)
-    {
-      perror (filename);
-      exit (EXIT_FAILURE);
-    }
-
-  c = '\n';
-  while (!feof (infile))
-    {
-      char buffer[BUFSIZ];
-      char type;
-
-      /* If not at end of line, skip till we get to one.  */
-      if (c != '\n' && c != '\r')
-	{
-	  c = getc (infile);
-	  continue;
-	}
-      /* Skip the line break.  */
-      while (c == '\n' || c == '\r')
-	c = getc (infile);
-      /* Detect a dynamic doc string and save it for the next expression.  */
-      if (c == '#')
-	{
-	  c = getc (infile);
-	  if (c == '@')
-	    {
-	      ptrdiff_t length = 0;
-	      ptrdiff_t i;
-
-	      /* Read the length.  */
-	      while ((c = getc (infile),
-		      c_isdigit (c)))
-		{
-		  if (INT_MULTIPLY_WRAPV (length, 10, &length)
-		      || INT_ADD_WRAPV (length, c - '0', &length)
-		      || SIZE_MAX < length)
-		    memory_exhausted ();
-		}
-
-	      if (length <= 1)
-		fatal ("invalid dynamic doc string length");
-
-	      if (c != ' ')
-		fatal ("space not found after dynamic doc string length");
-
-	      /* The next character is a space that is counted in the length
-		 but not part of the doc string.
-		 We already read it, so just ignore it.  */
-	      length--;
-
-	      /* Read in the contents.  */
-	      free (saved_string);
-	      saved_string = xmalloc (length);
-	      for (i = 0; i < length; i++)
-		saved_string[i] = getc (infile);
-	      /* The last character is a ^_.
-		 That is needed in the .elc file
-		 but it is redundant in DOC.  So get rid of it here.  */
-	      saved_string[length - 1] = 0;
-	      /* Skip the line break.  */
-	      while (c == '\n' || c == '\r')
-		c = getc (infile);
-	      /* Skip the following line.  */
-	      while (! (c == '\n' || c == '\r' || c < 0))
-		c = getc (infile);
-	    }
-	  continue;
-	}
-
-      if (c != '(')
-	continue;
-
-      read_lisp_symbol (infile, buffer);
-
-      if (! strcmp (buffer, "defun")
-	  || ! strcmp (buffer, "defmacro")
-	  || ! strcmp (buffer, "defsubst"))
-	{
-	  type = 'F';
-	  read_lisp_symbol (infile, buffer);
-
-	  /* Skip the arguments: either "nil" or a list in parens.  */
-
-	  c = getc (infile);
-	  if (c == 'n') /* nil */
-	    {
-	      if ((c = getc (infile)) != 'i'
-		  || (c = getc (infile)) != 'l')
-		{
-		  fprintf (stderr, "## unparsable arglist in %s (%s)\n",
-			   buffer, filename);
-		  continue;
-		}
-	    }
-	  else if (c != '(')
-	    {
-	      fprintf (stderr, "## unparsable arglist in %s (%s)\n",
-		       buffer, filename);
-	      continue;
-	    }
-	  else
-	    while (! (c == ')' || c < 0))
-	      c = getc (infile);
-	  skip_white (infile);
-
-	  /* If the next three characters aren't `dquote bslash newline'
-	     then we're not reading a docstring.
-	   */
-	  if ((c = getc (infile)) != '"'
-	      || (c = getc (infile)) != '\\'
-	      || ((c = getc (infile)) != '\n' && c != '\r'))
-	    {
-#ifdef DEBUG
-	      fprintf (stderr, "## non-docstring in %s (%s)\n",
-		       buffer, filename);
-#endif
-	      continue;
-	    }
-	}
-
-      /* defcustom can only occur in uncompiled Lisp files.  */
-      else if (! strcmp (buffer, "defvar")
-	       || ! strcmp (buffer, "defconst")
-	       || ! strcmp (buffer, "defcustom"))
-	{
-	  type = 'V';
-	  read_lisp_symbol (infile, buffer);
-
-	  if (saved_string == 0)
-	    if (!search_lisp_doc_at_eol (infile))
-	      continue;
-	}
-
-      else if (! strcmp (buffer, "custom-declare-variable")
-	       || ! strcmp (buffer, "defvaralias")
-	       )
-	{
-	  type = 'V';
-
-	  c = getc (infile);
-	  if (c == '\'')
-	    read_lisp_symbol (infile, buffer);
-	  else
-	    {
-	      if (c != '(')
-		{
-		  fprintf (stderr,
-			   "## unparsable name in custom-declare-variable in %s\n",
-			   filename);
-		  continue;
-		}
-	      read_lisp_symbol (infile, buffer);
-	      if (strcmp (buffer, "quote"))
-		{
-		  fprintf (stderr,
-			   "## unparsable name in custom-declare-variable in %s\n",
-			   filename);
-		  continue;
-		}
-	      read_lisp_symbol (infile, buffer);
-	      c = getc (infile);
-	      if (c != ')')
-		{
-		  fprintf (stderr,
-			   "## unparsable quoted name in custom-declare-variable in %s\n",
-			   filename);
-		  continue;
-		}
-	    }
-
-	  if (saved_string == 0)
-	    if (!search_lisp_doc_at_eol (infile))
-	      continue;
-	}
-
-      else if (! strcmp (buffer, "fset") || ! strcmp (buffer, "defalias"))
-	{
-	  type = 'F';
-
-	  c = getc (infile);
-	  if (c == '\'')
-	    read_lisp_symbol (infile, buffer);
-	  else
-	    {
-	      if (c != '(')
-		{
-		  fprintf (stderr, "## unparsable name in fset in %s\n",
-			   filename);
-		  continue;
-		}
-	      read_lisp_symbol (infile, buffer);
-	      if (strcmp (buffer, "quote"))
-		{
-		  fprintf (stderr, "## unparsable name in fset in %s\n",
-			   filename);
-		  continue;
-		}
-	      read_lisp_symbol (infile, buffer);
-	      c = getc (infile);
-	      if (c != ')')
-		{
-		  fprintf (stderr,
-			   "## unparsable quoted name in fset in %s\n",
-			   filename);
-		  continue;
-		}
-	    }
-
-	  if (saved_string == 0)
-	    if (!search_lisp_doc_at_eol (infile))
-	      continue;
-	}
-
-      else if (! strcmp (buffer, "autoload"))
-	{
-	  type = 'F';
-	  c = getc (infile);
-	  if (c == '\'')
-	    read_lisp_symbol (infile, buffer);
-	  else
-	    {
-	      if (c != '(')
-		{
-		  fprintf (stderr, "## unparsable name in autoload in %s\n",
-			   filename);
-		  continue;
-		}
-	      read_lisp_symbol (infile, buffer);
-	      if (strcmp (buffer, "quote"))
-		{
-		  fprintf (stderr, "## unparsable name in autoload in %s\n",
-			   filename);
-		  continue;
-		}
-	      read_lisp_symbol (infile, buffer);
-	      c = getc (infile);
-	      if (c != ')')
-		{
-		  fprintf (stderr,
-			   "## unparsable quoted name in autoload in %s\n",
-			   filename);
-		  continue;
-		}
-	    }
-	  skip_white (infile);
-	  c = getc (infile);
-	  if (c != '\"')
-	    {
-	      fprintf (stderr, "## autoload of %s unparsable (%s)\n",
-		       buffer, filename);
-	      continue;
-	    }
-	  read_c_string_or_comment (infile, 0, false, 0);
-
-	  if (saved_string == 0)
-	    if (!search_lisp_doc_at_eol (infile))
-	      continue;
-	}
-
-#ifdef DEBUG
-      else if (! strcmp (buffer, "if")
-	       || ! strcmp (buffer, "byte-code"))
-	continue;
-#endif
-
-      else
-	{
-#ifdef DEBUG
-	  fprintf (stderr, "## unrecognized top-level form, %s (%s)\n",
-		   buffer, filename);
-#endif
-	  continue;
-	}
-
-      /* At this point, we should either use the previous dynamic doc string in
-	 saved_string or gobble a doc string from the input file.
-	 In the latter case, the opening quote (and leading backslash-newline)
-	 have already been read.  */
-
-      printf ("\037%c%s\n", type, buffer);
-      if (saved_string)
-	{
-	  fputs (saved_string, stdout);
-	  /* Don't use one dynamic doc string twice.  */
-	  free (saved_string);
-	  saved_string = 0;
-	}
-      else
-	read_c_string_or_comment (infile, 1, false, 0);
-    }
-  free (saved_string);
-  if (ferror (infile) || fclose (infile) != 0)
-    fatal ("%s: read error", filename);
-}
-
-
 /* make-docfile.c ends here */
diff --git a/lisp/emacs-lisp/bytecomp.el b/lisp/emacs-lisp/bytecomp.el
index 11107ec0f6..a64af022d4 100644
--- a/lisp/emacs-lisp/bytecomp.el
+++ b/lisp/emacs-lisp/bytecomp.el
@@ -4926,13 +4926,13 @@ byte-compile-file-form-defalias
   ;; if it weren't for the fact that we need to figure out when a defalias
   ;; defines a macro, so as to add it to byte-compile-macro-environment.
   ;;
-  ;; FIXME: we also use this hunk-handler to implement the function's dynamic
-  ;; docstring feature.  We could actually implement it more elegantly in
-  ;; byte-compile-lambda so it applies to all lambdas, but the problem is that
-  ;; the resulting .elc format will not be recognized by make-docfile, so
-  ;; either we stop using DOC for the docstrings of preloaded elc files (at the
-  ;; cost of around 24KB on 32bit hosts, double on 64bit hosts) or we need to
-  ;; build DOC in a more clever way (e.g. handle anonymous elements).
+  ;; FIXME: we also use this hunk-handler to implement the function's
+  ;; dynamic docstring feature (via byte-compile-file-form-defmumble).
+  ;; We should actually implement it (more elegantly) in
+  ;; byte-compile-lambda so it applies to all lambdas.  We did it here
+  ;; so the resulting .elc format was recognizable by make-docfile,
+  ;; but since then we stopped using DOC for the docstrings of
+  ;; preloaded elc files so that obstacle is gone.
   (let ((byte-compile-free-references nil)
         (byte-compile-free-assignments nil))
     (pcase form
diff --git a/lisp/startup.el b/lisp/startup.el
index b79467339b..727432a4cb 100644
--- a/lisp/startup.el
+++ b/lisp/startup.el
@@ -1056,6 +1056,9 @@ startup--load-user-init-file
     (when debug-on-error-should-be-set
       (setq debug-on-error debug-on-error-from-init-file))))
 
+(defvar lisp-directory nil
+  "Directory containing the Lisp files that come with GNU Emacs.")
+
 (defun command-line ()
   "A subroutine of `normal-top-level'.
 Amongst another things, it parses the command-line arguments."
@@ -1087,8 +1090,7 @@ command-line
   (let ((simple-file-name
 	 ;; Look for simple.el or simple.elc and use their directory
 	 ;; as the place where all Lisp files live.
-	 (locate-file "simple" load-path (get-load-suffixes)))
-	lisp-dir)
+	 (locate-file "simple" load-path (get-load-suffixes))))
     ;; Don't abort if simple.el cannot be found, but print a warning.
     ;; Although in most usage we are going to cryptically abort a moment
     ;; later anyway, due to missing required bidi data files (eg bug#13430).
@@ -1104,12 +1106,13 @@ command-line
 	  (unless (file-readable-p lispdir)
 	    (princ (format "Lisp directory %s not readable?" lispdir))
 	    (terpri)))
-      (setq lisp-dir (file-truename (file-name-directory simple-file-name)))
+      (setq lisp-directory
+            (file-truename (file-name-directory simple-file-name)))
       (setq load-history
 	    (mapcar (lambda (elt)
 		      (if (and (stringp (car elt))
 			       (not (file-name-absolute-p (car elt))))
-			  (cons (concat lisp-dir
+			  (cons (concat lisp-directory
 					(car elt))
 				(cdr elt))
 			elt))
diff --git a/src/Makefile.in b/src/Makefile.in
index ea4a7207ff..76e4675c2a 100644
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -642,13 +642,11 @@ $(pdmp):
 ## for the first time, this prevents any variation between configurations
 ## in the contents of the DOC file.
 ##
-$(etc)/DOC: lisp.mk $(libsrc)/make-docfile$(EXEEXT) $(doc_obj) $(lisp)
+$(etc)/DOC: $(libsrc)/make-docfile$(EXEEXT) $(doc_obj)
 	$(AM_V_GEN)$(MKDIR_P) $(etc)
 	$(AM_V_at)rm -f $(etc)/DOC
 	$(AM_V_at)$(libsrc)/make-docfile -d $(srcdir) \
 	  $(SOME_MACHINE_OBJECTS) $(doc_obj) > $(etc)/DOC
-	$(AM_V_at)$(libsrc)/make-docfile -a $(etc)/DOC -d $(lispsource) \
-	  $(shortlisp)
 
 $(libsrc)/make-docfile$(EXEEXT) $(libsrc)/make-fingerprint$(EXEEXT): \
   $(lib)/libgnu.a
diff --git a/src/doc.c b/src/doc.c
index 6be023bb93..129d3a517b 100644
--- a/src/doc.c
+++ b/src/doc.c
@@ -84,16 +84,19 @@ get_doc_string (Lisp_Object filepos, bool unibyte, bool definition)
   char *from, *to, *name, *p, *p1;
   Lisp_Object file, pos;
   ptrdiff_t count = SPECPDL_INDEX ();
+  Lisp_Object dir;
   USE_SAFE_ALLOCA;
 
   if (FIXNUMP (filepos))
     {
       file = Vdoc_file_name;
+      dir = Vdoc_directory;
       pos = filepos;
     }
   else if (CONSP (filepos))
     {
       file = XCAR (filepos);
+      dir = Fsymbol_value (Qlisp_directory);
       pos = XCDR (filepos);
     }
   else
@@ -101,7 +104,7 @@ get_doc_string (Lisp_Object filepos, bool unibyte, bool definition)
 
   EMACS_INT position = eabs (XFIXNUM (pos));
 
-  if (!STRINGP (Vdoc_directory))
+  if (!STRINGP (dir))
     return Qnil;
 
   if (!STRINGP (file))
@@ -113,7 +116,7 @@ get_doc_string (Lisp_Object filepos, bool unibyte, bool definition)
   Lisp_Object tem = Ffile_name_absolute_p (file);
   file = ENCODE_FILE (file);
   Lisp_Object docdir
-    = NILP (tem) ? ENCODE_FILE (Vdoc_directory) : empty_unibyte_string;
+    = NILP (tem) ? ENCODE_FILE (dir) : empty_unibyte_string;
   ptrdiff_t docdir_sizemax = SBYTES (docdir) + 1;
   if (will_dump_p ())
     docdir_sizemax = max (docdir_sizemax, sizeof sibling_etc);
@@ -703,6 +706,7 @@ See variable `text-quoting-style'.  */)
 void
 syms_of_doc (void)
 {
+  DEFSYM (Qlisp_directory, "lisp-directory");
   DEFSYM (Qsubstitute_command_keys, "substitute-command-keys");
   DEFSYM (Qfunction_documentation, "function-documentation");
   DEFSYM (Qgrave, "grave");
diff --git a/src/lread.c b/src/lread.c
index 4992576414..55b3d473dc 100644
--- a/src/lread.c
+++ b/src/lread.c
@@ -1545,7 +1545,7 @@ Return t if the file exists and loads successfully.  */)
 	message_with_string ("Loading %s...", file, 1);
     }
 
-  specbind (Qload_file_name, found_eff);
+  specbind (Qload_file_name, hist_file_name);
   specbind (Qload_true_file_name, found);
   specbind (Qinhibit_file_name_operation, Qnil);
   specbind (Qload_in_progress, Qt);
@@ -3224,23 +3224,6 @@ read1 (Lisp_Object readcharfun, int *pch, bool first_in_list)
 		    Fstring_as_unibyte (AREF (tmp, COMPILED_BYTECODE)));
 	    }
 
-	  if (COMPILED_DOC_STRING < ASIZE (tmp)
-	      && EQ (AREF (tmp, COMPILED_DOC_STRING), make_fixnum (0)))
-	    {
-	      /* read_list found a docstring like '(#$ . 5521)' and treated it
-		 as 0.  This placeholder 0 would lead to accidental sharing in
-		 purecopy's hash-consing, so replace it with a (hopefully)
-		 unique integer placeholder, which is negative so that it is
-		 not confused with a DOC file offset (the USE_LSB_TAG shift
-		 relies on the fact that VALMASK is one bit narrower than
-		 INTMASK).  Eventually Snarf-documentation should replace the
-		 placeholder with the actual docstring.  */
-	      verify (INTMASK & ~VALMASK);
-	      EMACS_UINT hash = ((XHASH (tmp) >> USE_LSB_TAG)
-				 | (INTMASK - INTMASK / 2));
-	      ASET (tmp, COMPILED_DOC_STRING, make_ufixnum (hash));
-	    }
-
 	  XSETPVECTYPE (vec, PVEC_COMPILED);
 	  return tmp;
 	}
@@ -4208,31 +4191,13 @@ read_list (bool flag, Lisp_Object readcharfun)
 
       /* While building, if the list starts with #$, treat it specially.  */
       if (EQ (elt, Vload_file_name)
-	  && ! NILP (elt)
-	  && !NILP (Vpurify_flag))
+	  && ! NILP (elt))
 	{
-	  if (NILP (Vdoc_file_name))
-	    /* We have not yet called Snarf-documentation, so assume
-	       this file is described in the DOC file
-	       and Snarf-documentation will fill in the right value later.
-	       For now, replace the whole list with 0.  */
-	    doc_reference = 1;
-	  else
-	    /* We have already called Snarf-documentation, so make a relative
-	       file name for this file, so it can be found properly
-	       in the installed Lisp directory.
-	       We don't use Fexpand_file_name because that would make
-	       the directory absolute now.  */
-	    {
-	      AUTO_STRING (dot_dot_lisp, "../lisp/");
-	      elt = concat2 (dot_dot_lisp, Ffile_name_nondirectory (elt));
-	    }
+	  if (!NILP (Vpurify_flag))
+	    doc_reference = 0;
+	  else if (load_force_doc_strings)
+	    doc_reference = 2;
 	}
-      else if (EQ (elt, Vload_file_name)
-	       && ! NILP (elt)
-	       && load_force_doc_strings)
-	doc_reference = 2;
-
       if (ch)
 	{
 	  if (flag > 0)
@@ -4253,8 +4218,6 @@ read_list (bool flag, Lisp_Object readcharfun)
 
 	      if (ch == ')')
 		{
-		  if (doc_reference == 1)
-		    return make_fixnum (0);
 		  if (doc_reference == 2 && FIXNUMP (XCDR (val)))
 		    {
 		      char *saved = NULL;




^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: Not using DOC for ELisp files
  2021-12-29  0:15       ` Stefan Monnier
@ 2021-12-29 12:30         ` Johann Klähn
  2021-12-29 23:08           ` Stefan Monnier
  2021-12-29 12:52         ` Eli Zaretskii
  1 sibling, 1 reply; 31+ messages in thread
From: Johann Klähn @ 2021-12-29 12:30 UTC (permalink / raw)
  To: emacs-devel


Stefan Monnier <monnier@iro.umontreal.ca> writes:
> diff --git a/lib-src/make-docfile.c b/lib-src/make-docfile.c
> index d17c28be90..4f4b135589 100644
> --- a/lib-src/make-docfile.c
> +++ b/lib-src/make-docfile.c
> @@ -20,7 +20,7 @@ along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.  */
>  
>  
>  /* The arguments given to this program are all the C and Lisp source files

Maybe also remove the "Lisp source files" part here?




^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: Not using DOC for ELisp files
  2021-12-29  0:15       ` Stefan Monnier
  2021-12-29 12:30         ` Johann Klähn
@ 2021-12-29 12:52         ` Eli Zaretskii
  2021-12-29 23:23           ` Stefan Monnier
  1 sibling, 1 reply; 31+ messages in thread
From: Eli Zaretskii @ 2021-12-29 12:52 UTC (permalink / raw)
  To: Stefan Monnier; +Cc: emacs-devel, akrl

> From: Stefan Monnier <monnier@iro.umontreal.ca>
> Cc: Andrea Corallo <akrl@sdf.org>,  emacs-devel@gnu.org
> Date: Tue, 28 Dec 2021 19:15:11 -0500
> 
> Seeing how I haven't heard any opposition to the idea, I fixed a few
> loose ends, and I think it's now ready.  See below.
> Any objection?

This seems to do much more than just what you said, even if I include
the obvious cleanups, like unnecessary variables and support code no
longer required.  Are all the changes really necessary/derived, or did
you take the chance to make some additional changes, which should
perhaps be discussed separately?

>    When Emacs starts up, it sets up the value of @code{load-path}
> -in several steps.  First, it initializes @code{load-path} using
> -default locations set when Emacs was compiled.  Normally, this
> -is a directory something like
> +in several steps.  First, it initializes @code{lisp-directory} using
> +default locations set when Emacs was compiled.

You used for lisp-directory the same words as we used for load-path,
but is that the correct description?  Looking at the code that
computes the value of lisp-directory, I don't think so, I think you
can say something much more accurate and explicit about
lisp-directory.

Moreover, the text about load-path is now completely gone, and that is
a net loss, I think.

> +@defvar lisp-directory
> +Name of the directory holding Emacs's bundled Lisp files.

This is not accurate enough, given that it could mean both the place
where Emacs was built (the "bundled" part can be interpreted that
way), the place where *.el and *.elc files are installed when the
built Emacs is being installed, and the place where the *.eln files
are installed.

> +Normally, this is a directory something like
>  @example
>  "/usr/local/share/emacs/@var{version}/lisp"
>  @end example

This should tell what does @var{version} stand for.

> ++++
> +** New variable 'lisp-directory' holds the directory of Emacs's own Lisp files.

This suffers from the same accuracy problems.

> +(defvar lisp-directory nil
> +  "Directory containing the Lisp files that come with GNU Emacs.")

Likewise.  Actually, "files that come with GNU Emacs" is even worse in
its ambiguity than "bundled".

And why isn't the main part of the change called out in NEWS?  I think
this is something we should announce.

Thanks.



^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: Not using DOC for ELisp files
  2021-12-29 12:30         ` Johann Klähn
@ 2021-12-29 23:08           ` Stefan Monnier
  0 siblings, 0 replies; 31+ messages in thread
From: Stefan Monnier @ 2021-12-29 23:08 UTC (permalink / raw)
  To: Johann Klähn; +Cc: emacs-devel

> Maybe also remove the "Lisp source files" part here?

Indeed, thanks,


        Stefan




^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: Not using DOC for ELisp files
  2021-12-29 12:52         ` Eli Zaretskii
@ 2021-12-29 23:23           ` Stefan Monnier
  2021-12-30  7:20             ` Eli Zaretskii
  0 siblings, 1 reply; 31+ messages in thread
From: Stefan Monnier @ 2021-12-29 23:23 UTC (permalink / raw)
  To: Eli Zaretskii; +Cc: akrl, emacs-devel

>> Seeing how I haven't heard any opposition to the idea, I fixed a few
>> loose ends, and I think it's now ready.  See below.
>> Any objection?
> This seems to do much more than just what you said, even if I include
> the obvious cleanups, like unnecessary variables and support code no
> longer required.  Are all the changes really necessary/derived, or did
> you take the chance to make some additional changes, which should
> perhaps be discussed separately?

I don't think this includes any unrelated change.  I know I have
a tendency to do that even without noticing it, but I tried to be
careful this time.

Some of the needed changes could be done differently (mostly the
changes that revolve around the use of relative file names), I guess,
but it's all either needed or subsequent obvious cleanup.

>>    When Emacs starts up, it sets up the value of @code{load-path}
>> -in several steps.  First, it initializes @code{load-path} using
>> -default locations set when Emacs was compiled.  Normally, this
>> -is a directory something like
>> +in several steps.  First, it initializes @code{lisp-directory} using
>> +default locations set when Emacs was compiled.
> You used for lisp-directory the same words as we used for load-path,
> but is that the correct description?

Good question.  I think it should (as in, any difference is likely
a sign of a bug), tho I haven't looked closely at the code to see if the
code matches this expectation.

> Looking at the code that computes the value of lisp-directory, I don't
> think so, I think you can say something much more accurate and
> explicit about lisp-directory.

Don't know what that would look like.

> Moreover, the text about load-path is now completely gone, and that is
> a net loss, I think.

I don't see it being gone.  But yes, I'm not super happy with the text
I have.  I already rewrote it three times before the version you saw.
I'd appreciate some help with it.

>> +@defvar lisp-directory
>> +Name of the directory holding Emacs's bundled Lisp files.
> This is not accurate enough, given that it could mean both the place
> where Emacs was built (the "bundled" part can be interpreted that
> way), the place where *.el and *.elc files are installed when the
> built Emacs is being installed, and the place where the *.eln files
> are installed.

Hmm.. not sure how to avoid those problems: mentioning what it is not
would seem to muddy the waters even further.

>> +Normally, this is a directory something like
>>  @example
>>  "/usr/local/share/emacs/@var{version}/lisp"
>>  @end example
> This should tell what does @var{version} stand for.

(apparently like the author of that chunk) I don't see why that
would be necessary.

> Likewise.  Actually, "files that come with GNU Emacs" is even worse in
> its ambiguity than "bundled".

Any suggestion for a better wording?

> And why isn't the main part of the change called out in NEWS?
> I think this is something we should announce.

AFAIK it's invisible to the end user, so I think it isn't worth
mentioning there.


        Stefan




^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: Not using DOC for ELisp files
  2021-12-29 23:23           ` Stefan Monnier
@ 2021-12-30  7:20             ` Eli Zaretskii
  2021-12-31  4:19               ` Stefan Monnier
  0 siblings, 1 reply; 31+ messages in thread
From: Eli Zaretskii @ 2021-12-30  7:20 UTC (permalink / raw)
  To: Stefan Monnier; +Cc: emacs-devel, akrl

> From: Stefan Monnier <monnier@iro.umontreal.ca>
> Cc: akrl@sdf.org,  emacs-devel@gnu.org
> Date: Wed, 29 Dec 2021 18:23:32 -0500
> 
> >>    When Emacs starts up, it sets up the value of @code{load-path}
> >> -in several steps.  First, it initializes @code{load-path} using
> >> -default locations set when Emacs was compiled.  Normally, this
> >> -is a directory something like
> >> +in several steps.  First, it initializes @code{lisp-directory} using
> >> +default locations set when Emacs was compiled.
> > You used for lisp-directory the same words as we used for load-path,
> > but is that the correct description?
> 
> Good question.  I think it should (as in, any difference is likely
> a sign of a bug), tho I haven't looked closely at the code to see if the
> code matches this expectation.

load-path is a list, whereas lisp-directory is a single directory.
So we could describe the latter much more accurately.

> > Looking at the code that computes the value of lisp-directory, I don't
> > think so, I think you can say something much more accurate and
> > explicit about lisp-directory.
> 
> Don't know what that would look like.

Some text which says what its value should be, or where it should
point.

> > Moreover, the text about load-path is now completely gone, and that is
> > a net loss, I think.
> 
> I don't see it being gone.  But yes, I'm not super happy with the text
> I have.  I already rewrote it three times before the version you saw.
> I'd appreciate some help with it.

I'm trying to help ;-)

If you need more specific help, please show the text you'd like to
improve and tell why you are unhappy with it, and I will try to help
more.

> >> +@defvar lisp-directory
> >> +Name of the directory holding Emacs's bundled Lisp files.
> > This is not accurate enough, given that it could mean both the place
> > where Emacs was built (the "bundled" part can be interpreted that
> > way), the place where *.el and *.elc files are installed when the
> > built Emacs is being installed, and the place where the *.eln files
> > are installed.
> 
> Hmm.. not sure how to avoid those problems: mentioning what it is not
> would seem to muddy the waters even further.

Why not say that it points to where the *.el and *.elc files are
installed in the Emacs installation tree?

> >> +Normally, this is a directory something like
> >>  @example
> >>  "/usr/local/share/emacs/@var{version}/lisp"
> >>  @end example
> > This should tell what does @var{version} stand for.
> 
> (apparently like the author of that chunk) I don't see why that
> would be necessary.

I beg to disagree.  We always describe every @var meta-syntactic
variable in our docs.  It takes just one short sentence to do that in
this case.

> > Likewise.  Actually, "files that come with GNU Emacs" is even worse in
> > its ambiguity than "bundled".
> 
> Any suggestion for a better wording?

See above: mention the installation tree and the files in that
directory explicitly.

> > And why isn't the main part of the change called out in NEWS?
> > I think this is something we should announce.
> 
> AFAIK it's invisible to the end user, so I think it isn't worth
> mentioning there.

NEWS are not just for users, they are also for Lisp programmers.  We
have specialized sections there for that very reason.



^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: Not using DOC for ELisp files
  2021-12-30  7:20             ` Eli Zaretskii
@ 2021-12-31  4:19               ` Stefan Monnier
  2021-12-31  8:57                 ` Eli Zaretskii
  0 siblings, 1 reply; 31+ messages in thread
From: Stefan Monnier @ 2021-12-31  4:19 UTC (permalink / raw)
  To: Eli Zaretskii; +Cc: akrl, emacs-devel

> I'm trying to help ;-)

OK, after re-reading the section a few times and rewriting it a couple
more, I think I got something vaguely acceptable.


        Stefan




^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: Not using DOC for ELisp files
  2021-12-31  4:19               ` Stefan Monnier
@ 2021-12-31  8:57                 ` Eli Zaretskii
  2021-12-31 16:16                   ` Stefan Monnier
  0 siblings, 1 reply; 31+ messages in thread
From: Eli Zaretskii @ 2021-12-31  8:57 UTC (permalink / raw)
  To: Stefan Monnier; +Cc: emacs-devel, akrl

> From: Stefan Monnier <monnier@iro.umontreal.ca>
> Cc: akrl@sdf.org,  emacs-devel@gnu.org
> Date: Thu, 30 Dec 2021 23:19:43 -0500
> 
> > I'm trying to help ;-)
> 
> OK, after re-reading the section a few times and rewriting it a couple
> more, I think I got something vaguely acceptable.

Thanks, I have one question: if EMACSLOADPATH is set in the
environment, does it affect the value of lisp-directory?  Should it?



^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: Not using DOC for ELisp files
  2021-12-31  8:57                 ` Eli Zaretskii
@ 2021-12-31 16:16                   ` Stefan Monnier
  2021-12-31 18:45                     ` Eli Zaretskii
  0 siblings, 1 reply; 31+ messages in thread
From: Stefan Monnier @ 2021-12-31 16:16 UTC (permalink / raw)
  To: Eli Zaretskii; +Cc: akrl, emacs-devel

>> OK, after re-reading the section a few times and rewriting it a couple
>> more, I think I got something vaguely acceptable.
> Thanks, I have one question: if EMACSLOADPATH is set in the
> environment, does it affect the value of lisp-directory?

I don't know: I did not write the code that computes this directory,
because it was already written for the purpose of making absolute the
filenames of preloaded files in `load-history`.

> Should it?

Maybe/probably, but since EMACSLOADPATH is a list of directories I'm not
sure how to make it reliable.  Maybe it should look for some
"well-known" bundled ELisp file in EMACSLOADPATH an derive the value of
`lisp-directory` from it?


        Stefan




^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: Not using DOC for ELisp files
  2021-12-31 16:16                   ` Stefan Monnier
@ 2021-12-31 18:45                     ` Eli Zaretskii
  0 siblings, 0 replies; 31+ messages in thread
From: Eli Zaretskii @ 2021-12-31 18:45 UTC (permalink / raw)
  To: Stefan Monnier; +Cc: emacs-devel, akrl

> From: Stefan Monnier <monnier@iro.umontreal.ca>
> Cc: akrl@sdf.org,  emacs-devel@gnu.org
> Date: Fri, 31 Dec 2021 11:16:47 -0500
> 
> >> OK, after re-reading the section a few times and rewriting it a couple
> >> more, I think I got something vaguely acceptable.
> > Thanks, I have one question: if EMACSLOADPATH is set in the
> > environment, does it affect the value of lisp-directory?
> 
> I don't know: I did not write the code that computes this directory,
> because it was already written for the purpose of making absolute the
> filenames of preloaded files in `load-history`.

So if EMACSLOADPATH is set, the lisp-directory variable will not be
currently set?

> > Should it?
> 
> Maybe/probably, but since EMACSLOADPATH is a list of directories I'm not
> sure how to make it reliable.  Maybe it should look for some
> "well-known" bundled ELisp file in EMACSLOADPATH an derive the value of
> `lisp-directory` from it?

Yes, I think so.



^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: Not using DOC for ELisp files
  2021-12-28  1:48 Not using DOC for ELisp files Stefan Monnier
                   ` (3 preceding siblings ...)
  2021-12-28 12:44 ` Eli Zaretskii
@ 2022-01-03 13:48 ` Ken Raeburn
  2022-01-03 14:30   ` Eli Zaretskii
  4 siblings, 1 reply; 31+ messages in thread
From: Ken Raeburn @ 2022-01-03 13:48 UTC (permalink / raw)
  To: emacs-devel

On 2021-12-27 20:48, Stefan Monnier wrote:
> The patch below removes from the DOC file the docstrings coming from
> ELisp files.
>
> In numbers (in my tests), this reduces the size of DOC from about 3.2MB
> to about 850kB, and increases the pdmp size by about 53kB or about 0.7%.
>
> The benefits aren't great, obviously, but it does remove some warts in
> the `lread.c` code (into which I bumped in the `scratch/fcr` branch,
> which is why this comes up now).
>
> I have a vague recollection that such a change was mentioned in some
> bugreport-discussion some months ago but I can't remember where.
>
> Comments?


I've only skimmed the actual code changes so far, but I think this is a 
good direction to go.

Stefan Kargas dug up an old bug report of mine, but I think I may also 
have mentioned something like this in our old discussion and 
experimentation of doing away with unexec in favor of a big .elc file, a 
few years back now(!), my idea at the time being to reduce the runtime 
startup work done in a build without unexec.

Another piece I recall looking at, which I don't remember if I brought 
up on the list at the time, was moving the C based doc strings into the 
generated .o files and the executable; in combination with removing the 
elisp doc strings, this would get rid of the DOC file altogether. I 
haven't had much time for Emacs hacking in quite a while, sadly, but if 
there's interest, I can try to find a little time to dig up those old 
changes on the C side and see how complete they were and if they can 
still be adapted in some form or other...

Ken




^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: Not using DOC for ELisp files
  2022-01-03 13:48 ` Ken Raeburn
@ 2022-01-03 14:30   ` Eli Zaretskii
  2022-01-07 22:59     ` Ken Raeburn
  0 siblings, 1 reply; 31+ messages in thread
From: Eli Zaretskii @ 2022-01-03 14:30 UTC (permalink / raw)
  To: Ken Raeburn; +Cc: emacs-devel

> Date: Mon, 3 Jan 2022 08:48:03 -0500
> From: Ken Raeburn <raeburn@raeburn.org>
> 
> Another piece I recall looking at, which I don't remember if I brought 
> up on the list at the time, was moving the C based doc strings into the 
> generated .o files and the executable

Did you only think about ELF executables, or did you consider how to
do this with any binary formats we support?



^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: Not using DOC for ELisp files
  2022-01-03 14:30   ` Eli Zaretskii
@ 2022-01-07 22:59     ` Ken Raeburn
  2022-01-08  7:08       ` Eli Zaretskii
  0 siblings, 1 reply; 31+ messages in thread
From: Ken Raeburn @ 2022-01-07 22:59 UTC (permalink / raw)
  To: Eli Zaretskii; +Cc: emacs-devel

On 2022-01-03 09:30, Eli Zaretskii wrote:
>> Date: Mon, 3 Jan 2022 08:48:03 -0500
>> From: Ken Raeburn <raeburn@raeburn.org>
>>
>> Another piece I recall looking at, which I don't remember if I brought
>> up on the list at the time, was moving the C based doc strings into the
>> generated .o files and the executable
> Did you only think about ELF executables, or did you consider how to
> do this with any binary formats we support?

As I recall, it was a generic approach, putting the strings into char 
arrays linked in at the C level during compilation. For variables, I 
think there was some indirection via an array index, to avoid increasing 
Lisp_Symbol size while also not creating all the doc strings as Lisp 
strings up front. I don't recall if that part got completed; I tackled 
function docs first.

The annoying part was, it adds a generated header per C source file for 
these strings. I was looking at whether it could be done without doing 
so, but make-docfile tacks on the "(fn THIS-ARG THAT-ARG ...)" bit for 
function documentation which can't _exactly_ be done with preprocessor 
hacks; cpp has no "upcase" function, though tweaks to the runtime 
support could work around that. And DEFVAR docs need to be pulled out 
and (as mentioned above) stuffed into an array of strings.

There was an optimization I did, for platforms supporting the "section" 
attribute extension (at least MacOS and the GNU tools on ELF), to group 
together the strings in the object file (and hopefully the executable) 
so that, if the doc strings weren't actually used, none of those pages 
need be paged in from disk, because they aren't intermixed with other 
data (except maybe at the ends of the range). But if that support isn't 
available, the rest should still work fine. And if the non-Lisp doc 
strings amount to less than a megabyte, as I think an earlier email in 
this thread indicated, the memory cost of not doing this apparently 
isn't huge anyway.

Ken




^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: Not using DOC for ELisp files
  2022-01-07 22:59     ` Ken Raeburn
@ 2022-01-08  7:08       ` Eli Zaretskii
  0 siblings, 0 replies; 31+ messages in thread
From: Eli Zaretskii @ 2022-01-08  7:08 UTC (permalink / raw)
  To: Ken Raeburn; +Cc: emacs-devel

> Date: Fri, 7 Jan 2022 17:59:25 -0500
> Cc: emacs-devel@gnu.org
> From: Ken Raeburn <raeburn@raeburn.org>
> 
> There was an optimization I did, for platforms supporting the "section" 
> attribute extension (at least MacOS and the GNU tools on ELF), to group 
> together the strings in the object file (and hopefully the executable) 
> so that, if the doc strings weren't actually used, none of those pages 
> need be paged in from disk, because they aren't intermixed with other 
> data (except maybe at the ends of the range). But if that support isn't 
> available, the rest should still work fine. And if the non-Lisp doc 
> strings amount to less than a megabyte, as I think an earlier email in 
> this thread indicated, the memory cost of not doing this apparently 
> isn't huge anyway.

Beware: adding new sections to the binary causes problems when the
binary is stripped.



^ permalink raw reply	[flat|nested] 31+ messages in thread

end of thread, other threads:[~2022-01-08  7:08 UTC | newest]

Thread overview: 31+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-12-28  1:48 Not using DOC for ELisp files Stefan Monnier
2021-12-28  2:25 ` Po Lu
2021-12-28  3:48   ` Stefan Kangas
2021-12-28  5:39     ` Po Lu
2021-12-28  4:11   ` LdBeth
2021-12-28  5:03     ` Stefan Monnier
2021-12-28  5:38     ` Po Lu
2021-12-28  9:52     ` Phil Sainty
2021-12-28 10:31       ` Po Lu
2021-12-28 12:47         ` Po Lu
2021-12-28  7:10   ` Lars Ingebrigtsen
2021-12-28  3:39 ` Stefan Kangas
2021-12-28  5:10   ` Stefan Monnier
2021-12-28  6:56 ` Lars Ingebrigtsen
2021-12-28 12:44 ` Eli Zaretskii
2021-12-28 17:14   ` Stefan Monnier
2021-12-28 18:17     ` Eli Zaretskii
2021-12-29  0:15       ` Stefan Monnier
2021-12-29 12:30         ` Johann Klähn
2021-12-29 23:08           ` Stefan Monnier
2021-12-29 12:52         ` Eli Zaretskii
2021-12-29 23:23           ` Stefan Monnier
2021-12-30  7:20             ` Eli Zaretskii
2021-12-31  4:19               ` Stefan Monnier
2021-12-31  8:57                 ` Eli Zaretskii
2021-12-31 16:16                   ` Stefan Monnier
2021-12-31 18:45                     ` Eli Zaretskii
2022-01-03 13:48 ` Ken Raeburn
2022-01-03 14:30   ` Eli Zaretskii
2022-01-07 22:59     ` Ken Raeburn
2022-01-08  7:08       ` Eli Zaretskii

Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).