bug#8668: * editfns.c (Fformat): Fix several integer overflow problems.

unofficial mirror of bug-gnu-emacs@gnu.org 
 help / color / mirror / code / Atom feed

* bug#8668: * editfns.c (Fformat): Fix several integer overflow problems.
@ 2011-05-13  3:01 Paul Eggert
       [not found] ` <handler.8668.B.130525572522651.ack@debbugs.gnu.org>
  2011-05-27 20:40 ` bug#8668: fixes committed to trunk Paul Eggert
  0 siblings, 2 replies; 5+ messages in thread
From: Paul Eggert @ 2011-05-13  3:01 UTC (permalink / raw)
  To: 8668

Here's a patch for several integer overflow problems in (format ...),
including some that cause core dumps.  I plan to install this into
the trunk after some more testing.

* editfns.c (Fformat): Fix several integer overflow problems.
For example, without this change, (format "%2147483648d" 1) dumps
core on x86-64 GNU/Linux.  Use EMACS_INT, not size_t, for sizes,
since we prefer using signed values, and EMACS_INT will be big
enough soon, even on 32-bit hosts.  Also, prefer EMACS_INT to int
for sizes.  Don't assume that pI is either "l" or ""; it might be
"ll" or "I64".  Check for width and precision greater than
INT_MAX, as this can make sprintf go kaflooey.
=== modified file 'src/editfns.c'
--- src/editfns.c	2011-04-14 19:34:42 +0000
+++ src/editfns.c	2011-05-13 02:30:06 +0000
@@ -3583,11 +3583,12 @@
 usage: (format STRING &rest OBJECTS)  */)
   (size_t nargs, register Lisp_Object *args)
 {
-  register size_t n;		/* The number of the next arg to substitute */
-  register size_t total;	/* An estimate of the final length */
+  register EMACS_INT n;		/* The number of the next arg to substitute */
+  register EMACS_INT total;	/* An estimate of the final length */
+  int pIlen = sizeof pI - 1;
   char *buf, *p;
   register char *format, *end, *format_start;
-  int nchars;
+  EMACS_INT nchars;
   /* Nonzero if the output should be a multibyte string,
      which is true if any of the inputs is one.  */
   int multibyte = 0;
@@ -3603,7 +3604,7 @@
      no argument, *will* be assigned to in the case that a `%' and `.'
      occur after the final format specifier.  */
   int *precision = (int *) (alloca ((nargs + 1) * sizeof (int)));
-  int longest_format;
+  EMACS_INT longest_format;
   Lisp_Object val;
   int arg_intervals = 0;
   USE_SAFE_ALLOCA;
@@ -3619,7 +3620,8 @@
      info[0] is unused.  Unused elements have -1 for start.  */
   struct info
   {
-    int start, end, intervals;
+    EMACS_INT start, end;
+    int intervals;
   } *info = 0;

   /* It should not be necessary to GCPRO ARGS, because
@@ -3660,8 +3662,8 @@

   /* Allocate the info and discarded tables.  */
   {
-    size_t nbytes = (nargs+1) * sizeof *info;
-    size_t i;
+    EMACS_INT nbytes = (nargs + 1) * sizeof *info;
+    EMACS_INT i;
     if (!info)
       info = (struct info *) alloca (nbytes);
     memset (info, 0, nbytes);
@@ -3706,25 +3708,33 @@
 		   || * format == ' ' || *format == '+'))
 	  ++format;

+	/* Parse width and precision, limiting them to the range of 'int'
+	   because otherwise the underyling sprintf may go kaflooey.  */
+
 	if (*format >= '0' && *format <= '9')
 	  {
-	    for (field_width = 0; *format >= '0' && *format <= '9'; ++format)
-	      field_width = 10 * field_width + *format - '0';
+	    char *width_end;
+	    unsigned long width = strtoul (format, &width_end, 10);
+	    if (INT_MAX < width)
+	      error ("Format string field width too large");
+	    field_width = width;
+	    format = width_end;
 	  }

 	/* N is not incremented for another few lines below, so refer to
 	   element N+1 (which might be precision[NARGS]). */
 	if (*format == '.')
 	  {
-	    ++format;
-	    for (precision[n+1] = 0; *format >= '0' && *format <= '9'; ++format)
-	      precision[n+1] = 10 * precision[n+1] + *format - '0';
+	    char *prec_end;
+	    unsigned long prec = strtoul (format + 1, &prec_end, 10);
+	    if (INT_MAX < prec)
+	      error ("Format string precision too large");
+	    precision[n + 1] = prec;
+	    format = prec_end;
 	  }

-	/* Extra +1 for 'l' that we may need to insert into the
-	   format.  */
-	if (format - this_format_start + 2 > longest_format)
-	  longest_format = format - this_format_start + 2;
+	if (longest_format < format - this_format_start + pIlen + 1)
+	  longest_format = format - this_format_start + pIlen + 1;

 	if (format == end)
 	  error ("Format string ends in middle of format specifier");
@@ -3975,24 +3985,22 @@
 	    }
 	  else if (INTEGERP (args[n]) || FLOATP (args[n]))
 	    {
-	      int this_nchars;
+	      EMACS_INT this_nchars;
+	      EMACS_INT this_format_len = format - this_format_start;

-	      memcpy (this_format, this_format_start,
-		      format - this_format_start);
-	      this_format[format - this_format_start] = 0;
+	      memcpy (this_format, this_format_start, this_format_len);
+	      this_format[this_format_len] = 0;

 	      if (format[-1] == 'e' || format[-1] == 'f' || format[-1] == 'g')
 		sprintf (p, this_format, XFLOAT_DATA (args[n]));
 	      else
 		{
-		  if (sizeof (EMACS_INT) > sizeof (int)
-		      && format[-1] != 'c')
+		  if (pIlen && format[-1] != 'c')
 		    {
-		      /* Insert 'l' before format spec.  */
-		      this_format[format - this_format_start]
-			= this_format[format - this_format_start - 1];
-		      this_format[format - this_format_start - 1] = 'l';
-		      this_format[format - this_format_start + 1] = 0;
+		      /* Insert pI before format spec.  */
+		      memcpy (&this_format[this_format_len - 1], pI, pIlen);
+		      this_format[this_format_len + pIlen - 1] = format[-1];
+		      this_format[this_format_len + pIlen] = 0;
 		    }

 		  if (INTEGERP (args[n]))
@@ -4089,7 +4097,7 @@
       if (CONSP (props))
 	{
 	  EMACS_INT bytepos = 0, position = 0, translated = 0;
-	  int argn = 1;
+	  EMACS_INT argn = 1;
 	  Lisp_Object list;

 	  /* Adjust the bounds of each text property






^ permalink raw reply	[flat|nested] 5+ messages in thread

* bug#8668: * editfns.c (Fformat): Fix several integer overflow problems
       [not found] ` <handler.8668.B.130525572522651.ack@debbugs.gnu.org>
@ 2011-05-16  6:56   ` Paul Eggert
  2011-05-16  9:27     ` Eli Zaretskii
  0 siblings, 1 reply; 5+ messages in thread
From: Paul Eggert @ 2011-05-16  6:56 UTC (permalink / raw)
  To: 8668

Further inspection has found several other problems with Fformat,
having to do with large strings and integer overflow.  Here's a
revised patch to address the problems that I've found.  This
supersedes the earlier patch.  I'll do more testing on this one
but thought I'd give people a heads-up.  This patch assumes other
fixes I've sent in recently.

* editfns.c: Rework Fformat to avoid integer overflow issues.
Don't include <float.h>; no longer needed.
(MAX_10_EXP, CONVERTED_BYTE_SIZE): Remove; no longer needed.
(format_overflow): New function.
(pWIDE, pWIDElen, signed_wide, unsigned_wide): New defns.
(Fformat): Avoid the prepass trying to compute sizes; it was
only approximate and thus did not catch overflow reliably.
Instead, walk through the format just once, formatting and
computing sizes as we go, checking for integer overflow at
every step, and allocating a larger buffer as needed.
Keep track separately whether the format is multibyte.
Keep only the most-recently calculated precision, rather than them all.
Record whether each argument has been converted to string.
Use EMACS_INT, not int, for byte and char and arg counts.
Support field widths and precisions larger than INT_MAX, for
strings, since we format them ourselves.  Fix bug with
strchr succeeding on '\0' when looking for flags.  Be more careful
about formatting out-of-range floating point numbers with int
formats.  (Bug#8668)
=== modified file 'src/editfns.c'
--- src/editfns.c	2011-05-15 17:17:44 +0000
+++ src/editfns.c	2011-05-16 06:40:19 +0000
@@ -57,13 +57,6 @@
 #include "window.h"
 #include "blockinput.h"

-#ifdef STDC_HEADERS
-#include <float.h>
-#define MAX_10_EXP	DBL_MAX_10_EXP
-#else
-#define MAX_10_EXP	310
-#endif
-
 #ifndef NULL
 #define NULL 0
 #endif
@@ -3526,13 +3519,30 @@
 }


-/* Number of bytes that STRING will occupy when put into the result.
-   MULTIBYTE is nonzero if the result should be multibyte.  */
+/* Report the failure of a format conversion of a single number, due to
+   size overflow.  Snprintf's API has an INT_MAX-byte limit.  */
+static void format_overflow (void) NO_RETURN;
+static void
+format_overflow (void)
+{
+  error ("Maximum format conversion size exceeded");
+}

-#define CONVERTED_BYTE_SIZE(MULTIBYTE, STRING)				\
-  (((MULTIBYTE) && ! STRING_MULTIBYTE (STRING))				\
-   ? count_size_as_multibyte (SDATA (STRING), SBYTES (STRING))		\
-   : SBYTES (STRING))
+/* A conversion for printing large decimal integers (possibly including
+   trailing "d" that is ignored), along with its length, signed and
+   unsigned types for printing them, and their bounds.  Use widest integers
+   if available, EMACS_INT otherwise.  */
+#ifdef PRIdMAX
+# define pWIDE PRIdMAX
+enum { pWIDElen = sizeof PRIdMAX - 2 }; /* Don't count trailing "d".  */
+typedef intmax_t signed_wide;
+typedef uintmax_t unsigned_wide;
+#else
+# define pWIDE pI
+enum { pWIDElen = sizeof pI - 1 };
+typedef EMACS_INT signed_wide;
+typedef EMACS_UINT unsigned_wide;
+#endif

 DEFUN ("format", Fformat, Sformat, 1, MANY, 0,
        doc: /* Format a string out of a format-string and arguments.
@@ -3583,11 +3593,14 @@
 usage: (format STRING &rest OBJECTS)  */)
   (size_t nargs, register Lisp_Object *args)
 {
-  register size_t n;		/* The number of the next arg to substitute */
-  register size_t total;	/* An estimate of the final length */
+  register EMACS_INT n;		/* The number of the next arg to substitute */
   char *buf, *p;
+  EMACS_INT bufsize;
+  EMACS_INT max_bufsize = min (MOST_POSITIVE_FIXNUM + 1, SIZE_MAX);
   register char *format, *end, *format_start;
-  int nchars;
+  EMACS_INT formatlen, nchars;
+  /* Nonzero if the format is multibyte.  */
+  int multibyte_format = 0;
   /* Nonzero if the output should be a multibyte string,
      which is true if any of the inputs is one.  */
   int multibyte = 0;
@@ -3596,14 +3609,6 @@
      multibyte character of the previous string.  This flag tells if we
      must consider such a situation or not.  */
   int maybe_combine_byte;
-  char *this_format;
-  /* Precision for each spec, or -1, a flag value meaning no precision
-     was given in that spec.  Element 0, corresponding to the format
-     string itself, will not be used.  Element NARGS, corresponding to
-     no argument, *will* be assigned to in the case that a `%' and `.'
-     occur after the final format specifier.  */
-  int *precision = (int *) (alloca ((nargs + 1) * sizeof (int)));
-  int longest_format;
   Lisp_Object val;
   int arg_intervals = 0;
   USE_SAFE_ALLOCA;
@@ -3611,323 +3616,230 @@
   /* discarded[I] is 1 if byte I of the format
      string was not copied into the output.
      It is 2 if byte I was not the first byte of its character.  */
-  char *discarded = 0;
+  char *discarded;

   /* Each element records, for one argument,
      the start and end bytepos in the output string,
+     whether the argument has been converted to string (e.g., due to "%S"),
      and whether the argument is a string with intervals.
      info[0] is unused.  Unused elements have -1 for start.  */
   struct info
   {
-    int start, end, intervals;
+    EMACS_INT start, end;
+    int converted_to_string;
+    int intervals;
   } *info = 0;

   /* It should not be necessary to GCPRO ARGS, because
      the caller in the interpreter should take care of that.  */

+  CHECK_STRING (args[0]);
+
+  /* Allocate a buffer to contain a copy of the result.  The result size is
+     not known yet, so guess it; it will be increased as needed.  */
+  {
+    enum { min_bufsize = 1000, bufsize_multiplier = 4 };
+    formatlen = SBYTES (args[0]);
+    bufsize = formatlen + 1;
+    if (bufsize <= min_bufsize / bufsize_multiplier)
+      bufsize = min_bufsize;
+    else if (bufsize <= max_bufsize / bufsize_multiplier)
+      bufsize *= bufsize_multiplier;
+    else if (bufsize <= max_bufsize)
+      bufsize = max_bufsize;
+    else
+      memory_full ();
+    SAFE_ALLOCA (buf, char *, bufsize);
+  }
+
+  /* Ensure that BUF is big enough to hold N more bytes.  N must be
+     nonnegative.  */
+# define BUF_ROOM(n)							\
+  do									\
+    {									\
+      EMACS_INT additional = (n);					\
+      EMACS_INT used = p - buf;						\
+      if (bufsize - used < additional)					\
+	{								\
+	  char *newbuf;							\
+	  if (INT_ADD_OVERFLOW (used, additional)			\
+	      || max_bufsize < used + additional)			\
+	    string_overflow ();						\
+	  bufsize = used + additional;					\
+	  bufsize = (bufsize < max_bufsize / 3 * 2			\
+		     ? bufsize + bufsize / 2				\
+		     : max_bufsize);					\
+	  SAFE_ALLOCA (newbuf, char *, bufsize);			\
+	  memcpy (newbuf, buf, used);					\
+	  buf = newbuf;							\
+	  p = buf + used;						\
+	}								\
+    }									\
+  while (0);
+
+  /* Allocate the info and discarded tables, and room for a copy of
+     the format.  The copy is needed because we may truncate a format
+     specifier with '\0', or change "%S" to "%s", or insert pWIDE.  */
+  {
+    EMACS_INT i, extralen = 2 * formatlen + pWIDElen + 1;
+    if (SIZE_MAX < extralen
+	|| (SIZE_MAX - extralen) / sizeof (struct info) <= nargs)
+      memory_full ();
+    SAFE_ALLOCA (info, struct info *, (nargs + 1) * sizeof *info + extralen);
+    discarded = (char *) &info[nargs + 1];
+    format_start = discarded + formatlen + pWIDElen;
+    for (i = 0; i < nargs + 1; i++)
+      {
+	info[i].start = -1;
+	info[i].intervals = info[i].converted_to_string = 0;
+      }
+    memset (discarded, 0, formatlen);
+  }
+
   /* Try to determine whether the result should be multibyte.
      This is not always right; sometimes the result needs to be multibyte
      because of an object that we will pass through prin1,
      and in that case, we won't know it here.  */
-  for (n = 0; n < nargs; n++)
-    {
-      if (STRINGP (args[n]) && STRING_MULTIBYTE (args[n]))
-	multibyte = 1;
-      /* Piggyback on this loop to initialize precision[N]. */
-      precision[n] = -1;
-    }
-  precision[nargs] = -1;
-
-  CHECK_STRING (args[0]);
-  /* We may have to change "%S" to "%s". */
-  args[0] = Fcopy_sequence (args[0]);
-
-  /* GC should never happen here, so abort if it does.  */
-  abort_on_gc++;
+  multibyte_format = STRING_MULTIBYTE (args[0]);
+  multibyte = multibyte_format;
+  for (n = 1; !multibyte && n < nargs; n++)
+    if (STRINGP (args[n]) && STRING_MULTIBYTE (args[n]))
+      multibyte = 1;

   /* If we start out planning a unibyte result,
-     then discover it has to be multibyte, we jump back to retry.
-     That can only happen from the first large while loop below.  */
+     then discover it has to be multibyte, we jump back to retry.  */
  retry:

-  format = SSDATA (args[0]);
-  format_start = format;
-  end = format + SBYTES (args[0]);
-  longest_format = 0;
-
-  /* Make room in result for all the non-%-codes in the control string.  */
-  total = 5 + CONVERTED_BYTE_SIZE (multibyte, args[0]) + 1;
-
-  /* Allocate the info and discarded tables.  */
-  {
-    size_t nbytes = (nargs+1) * sizeof *info;
-    size_t i;
-    if (!info)
-      info = (struct info *) alloca (nbytes);
-    memset (info, 0, nbytes);
-    for (i = 0; i < nargs + 1; i++)
-      info[i].start = -1;
-    if (!discarded)
-      SAFE_ALLOCA (discarded, char *, SBYTES (args[0]));
-    memset (discarded, 0, SBYTES (args[0]));
-  }
-
-  /* Add to TOTAL enough space to hold the converted arguments.  */
-
-  n = 0;
-  while (format != end)
-    if (*format++ == '%')
-      {
-	EMACS_INT thissize = 0;
-	EMACS_INT actual_width = 0;
-	char *this_format_start = format - 1;
-	int field_width = 0;
-
-	/* General format specifications look like
-
-	   '%' [flags] [field-width] [precision] format
-
-	   where
-
-	   flags	::= [-+ #0]+
-	   field-width	::= [0-9]+
-	   precision	::= '.' [0-9]*
-
-	   If a field-width is specified, it specifies to which width
-	   the output should be padded with blanks, if the output
-	   string is shorter than field-width.
-
-	   If precision is specified, it specifies the number of
-	   digits to print after the '.' for floats, or the max.
-	   number of chars to print from a string.  */
-
-	while (format != end
-	       && (*format == '-' || *format == '0' || *format == '#'
-		   || * format == ' ' || *format == '+'))
-	  ++format;
-
-	if (*format >= '0' && *format <= '9')
-	  {
-	    for (field_width = 0; *format >= '0' && *format <= '9'; ++format)
-	      field_width = 10 * field_width + *format - '0';
-	  }
-
-	/* N is not incremented for another few lines below, so refer to
-	   element N+1 (which might be precision[NARGS]). */
-	if (*format == '.')
-	  {
-	    ++format;
-	    for (precision[n+1] = 0; *format >= '0' && *format <= '9'; ++format)
-	      precision[n+1] = 10 * precision[n+1] + *format - '0';
-	  }
-
-	/* Extra +1 for 'l' that we may need to insert into the
-	   format.  */
-	if (format - this_format_start + 2 > longest_format)
-	  longest_format = format - this_format_start + 2;
-
-	if (format == end)
-	  error ("Format string ends in middle of format specifier");
-	if (*format == '%')
-	  format++;
-	else if (++n >= nargs)
-	  error ("Not enough arguments for format string");
-	else if (*format == 'S')
-	  {
-	    /* For `S', prin1 the argument and then treat like a string.  */
-	    register Lisp_Object tem;
-	    tem = Fprin1_to_string (args[n], Qnil);
-	    if (STRING_MULTIBYTE (tem) && ! multibyte)
-	      {
-		multibyte = 1;
-		goto retry;
-	      }
-	    args[n] = tem;
-	    /* If we restart the loop, we should not come here again
-	       because args[n] is now a string and calling
-	       Fprin1_to_string on it produces superflous double
-	       quotes.  So, change "%S" to "%s" now.  */
-	    *format = 's';
-	    goto string;
-	  }
-	else if (SYMBOLP (args[n]))
-	  {
-	    args[n] = SYMBOL_NAME (args[n]);
-	    if (STRING_MULTIBYTE (args[n]) && ! multibyte)
-	      {
-		multibyte = 1;
-		goto retry;
-	      }
-	    goto string;
-	  }
-	else if (STRINGP (args[n]))
-	  {
-	  string:
-	    if (*format != 's' && *format != 'S')
-	      error ("Format specifier doesn't match argument type");
-	    /* In the case (PRECISION[N] > 0), THISSIZE may not need
-	       to be as large as is calculated here.  Easy check for
-	       the case PRECISION = 0. */
-	    thissize = precision[n] ? CONVERTED_BYTE_SIZE (multibyte, args[n]) : 0;
-	    /* The precision also constrains how much of the argument
-	       string will finally appear (Bug#5710). */
-	    actual_width = lisp_string_width (args[n], -1, NULL, NULL);
-	    if (precision[n] != -1)
-	      actual_width = min (actual_width, precision[n]);
-	  }
-	/* Would get MPV otherwise, since Lisp_Int's `point' to low memory.  */
-	else if (INTEGERP (args[n]) && *format != 's')
-	  {
-	    /* The following loop assumes the Lisp type indicates
-	       the proper way to pass the argument.
-	       So make sure we have a flonum if the argument should
-	       be a double.  */
-	    if (*format == 'e' || *format == 'f' || *format == 'g')
-	      args[n] = Ffloat (args[n]);
-	    else
-	      if (*format != 'd' && *format != 'o' && *format != 'x'
-		  && *format != 'i' && *format != 'X' && *format != 'c')
-		error ("Invalid format operation %%%c", *format);
-
-	    thissize = 30 + (precision[n] > 0 ? precision[n] : 0);
-	    if (*format == 'c')
-	      {
-		if (! ASCII_CHAR_P (XINT (args[n]))
-		    /* Note: No one can remember why we have to treat
-		       the character 0 as a multibyte character here.
-		       But, until it causes a real problem, let's
-		       don't change it.  */
-		    || XINT (args[n]) == 0)
-		  {
-		    if (! multibyte)
-		      {
-			multibyte = 1;
-			goto retry;
-		      }
-		    args[n] = Fchar_to_string (args[n]);
-		    thissize = SBYTES (args[n]);
-		  }
-	      }
-	  }
-	else if (FLOATP (args[n]) && *format != 's')
-	  {
-	    if (! (*format == 'e' || *format == 'f' || *format == 'g'))
-	      {
-		if (*format != 'd' && *format != 'o' && *format != 'x'
-		    && *format != 'i' && *format != 'X' && *format != 'c')
-		  error ("Invalid format operation %%%c", *format);
-		/* This fails unnecessarily if args[n] is bigger than
-		   most-positive-fixnum but smaller than MAXINT.
-		   These cases are important because we sometimes use floats
-		   to represent such integer values (typically such values
-		   come from UIDs or PIDs).  */
-		/* args[n] = Ftruncate (args[n], Qnil); */
-	      }
-
-	    /* Note that we're using sprintf to print floats,
-	       so we have to take into account what that function
-	       prints.  */
-	    /* Filter out flag value of -1.  */
-	    thissize = (MAX_10_EXP + 100
-			+ (precision[n] > 0 ? precision[n] : 0));
-	  }
-	else
-	  {
-	    /* Anything but a string, convert to a string using princ.  */
-	    register Lisp_Object tem;
-	    tem = Fprin1_to_string (args[n], Qt);
-	    if (STRING_MULTIBYTE (tem) && ! multibyte)
-	      {
-		multibyte = 1;
-		goto retry;
-	      }
-	    args[n] = tem;
-	    goto string;
-	  }
-
-	thissize += max (0, field_width - actual_width);
-	total += thissize + 4;
-      }
-
-  abort_on_gc--;
-
-  /* Now we can no longer jump to retry.
-     TOTAL and LONGEST_FORMAT are known for certain.  */
-
-  this_format = (char *) alloca (longest_format + 1);
-
-  /* Allocate the space for the result.
-     Note that TOTAL is an overestimate.  */
-  SAFE_ALLOCA (buf, char *, total);
-
   p = buf;
   nchars = 0;
   n = 0;

   /* Scan the format and store result in BUF.  */
-  format = SSDATA (args[0]);
-  format_start = format;
-  end = format + SBYTES (args[0]);
+  memcpy (format_start, SSDATA (args[0]), formatlen + 1);
+  format = format_start;
+  end = format + formatlen;
   maybe_combine_byte = 0;
   while (format != end)
     {
       if (*format == '%')
 	{
-	  int minlen;
+	  /* General format specifications look like
+
+	     '%' [flags] [field-width] [precision] format
+
+	     where
+
+	     flags ::= [-+0# ]+
+	     field-width ::= [0-9]+
+	     precision ::= '.' [0-9]*
+
+	     If a field-width is specified, it specifies to which width
+	     the output should be padded with blanks, if the output
+	     string is shorter than field-width.
+
+	     If precision is specified, it specifies the number of
+	     digits to print after the '.' for floats, or the max.
+	     number of chars to print from a string.  */
+
 	  int negative = 0;
-	  char *this_format_start = format;
+	  char *convspec = format; /* The conversion specification.  */
+	  uintmax_t field_width;
+	  uintmax_t precision = UINTMAX_MAX;
+	  char *num_end;
+	  char conversion;

-	  discarded[format - format_start] = 1;
 	  format++;

-	  while (strchr ("-+0# ", *format))
+	  while (*format == '-' || *format == ' ' || *format == '#'
+		 || *format == '+' || *format == '0')
 	    {
 	      if (*format == '-')
 		{
 		  negative = 1;
 		}
-	      discarded[format - format_start] = 1;
 	      ++format;
 	    }

-	  minlen = atoi (format);
-
-	  while ((*format >= '0' && *format <= '9') || *format == '.')
-	    {
-	      discarded[format - format_start] = 1;
-	      format++;
-	    }
-
-	  if (*format++ == '%')
-	    {
-	      *p++ = '%';
-	      nchars++;
-	      continue;
-	    }
+	  field_width = strtoumax (format, &num_end, 10);
+	  if (*num_end == '.')
+	    precision = strtoumax (num_end + 1, &num_end, 10);
+	  format = num_end;
+
+	  if (format == end)
+	    error ("Format string ends in middle of format specifier");
+
+	  memset (&discarded[convspec - format_start], 1, format - convspec);
+	  conversion = *format;
+	  if (conversion == '%')
+	    goto copy_char;
+	  discarded[format - format_start] = 1;
+	  format++;

 	  ++n;
+	  if (! (n < nargs))
+	    error ("Not enough arguments for format string");

-	  discarded[format - format_start - 1] = 1;
 	  info[n].start = nchars;

+	  /* For 'S', prin1 the argument, and then treat like 's'.
+	     For 's', princ any argument that is not a string or
+	     symbol.  But don't do this conversion twice, which might
+	     happen after retrying.  */
+	  if ((conversion == 'S'
+	       || (conversion == 's'
+		   && ! STRINGP (args[n]) && ! SYMBOLP (args[n]))))
+	    {
+	      if (! info[n].converted_to_string)
+		{
+		  Lisp_Object noescape = conversion == 'S' ? Qnil : Qt;
+		  args[n] = Fprin1_to_string (args[n], noescape);
+		  info[n].converted_to_string = 1;
+		  if (STRING_MULTIBYTE (args[n]) && ! multibyte)
+		    {
+		      multibyte = 1;
+		      goto retry;
+		    }
+		}
+	      conversion = 's';
+	      goto string;
+	    }
+
+	  if (SYMBOLP (args[n]))
+	    {
+	      args[n] = SYMBOL_NAME (args[n]);
+	      if (STRING_MULTIBYTE (args[n]) && ! multibyte)
+		{
+		  multibyte = 1;
+		  goto retry;
+		}
+	      goto string;
+	    }
+
 	  if (STRINGP (args[n]))
+	  string:
 	    {
 	      /* handle case (precision[n] >= 0) */

-	      int width, padding;
+	      EMACS_INT width, padding, outbytes;
 	      EMACS_INT nbytes, start;
 	      EMACS_INT nchars_string;

+	      if (conversion != 's')
+		error ("Format specifier doesn't match argument type");
+	      if (max_bufsize <= field_width
+		  || (precision != UINTMAX_MAX && max_bufsize <= precision))
+		string_overflow ();
+
 	      /* lisp_string_width ignores a precision of 0, but GNU
 		 libc functions print 0 characters when the precision
 		 is 0.  Imitate libc behavior here.  Changing
 		 lisp_string_width is the right thing, and will be
 		 done, but meanwhile we work with it. */

-	      if (precision[n] == 0)
+	      if (precision == 0)
 		width = nchars_string = nbytes = 0;
-	      else if (precision[n] > 0)
-		width = lisp_string_width (args[n], precision[n],
+	      else if (precision != UINTMAX_MAX)
+		width = lisp_string_width (args[n], precision,
 					   &nchars_string, &nbytes);
 	      else
 		{		/* no precision spec given for this argument */
@@ -3936,14 +3848,23 @@
 		  nchars_string = SCHARS (args[n]);
 		}

+	      outbytes = nbytes;
+	      if (outbytes && multibyte && ! STRING_MULTIBYTE (args[n]))
+		outbytes = count_size_as_multibyte (SDATA (args[n]), nbytes);
+
+	      padding = width < field_width ? field_width - width : 0;
+
+	      if (INT_ADD_OVERFLOW (outbytes, padding))
+		string_overflow ();
+	      BUF_ROOM (outbytes + padding);
+
 	      /* If spec requires it, pad on right with spaces.  */
-	      padding = minlen - width;
 	      if (! negative)
-		while (padding-- > 0)
-		  {
-		    *p++ = ' ';
-		    ++nchars;
-		  }
+		{
+		  memset (p, ' ', padding);
+		  p += padding;
+		  nchars += padding;
+		}

 	      info[n].start = start = nchars;
 	      nchars += nchars_string;
@@ -3961,108 +3882,216 @@

 	      info[n].end = nchars;

-	      if (negative)
-		while (padding-- > 0)
-		  {
-		    *p++ = ' ';
-		    nchars++;
-		  }
+	      if (negative && 0 < padding)
+		{
+		  memset (p, ' ', padding);
+		  p += padding;
+		  nchars += padding;
+		}

 	      /* If this argument has text properties, record where
 		 in the result string it appears.  */
 	      if (STRING_INTERVALS (args[n]))
 		info[n].intervals = arg_intervals = 1;
 	    }
+	  else if (! (conversion == 'c' || conversion == 'd'
+		      || conversion == 'e' || conversion == 'f'
+		      || conversion == 'g' || conversion == 'i'
+		      || conversion == 'o' || conversion == 'x'
+		      || conversion == 'X'))
+	    error ("Invalid format operation %%%c", conversion);
 	  else if (INTEGERP (args[n]) || FLOATP (args[n]))
 	    {
 	      int this_nchars;

-	      memcpy (this_format, this_format_start,
-		      format - this_format_start);
-	      this_format[format - this_format_start] = 0;
-
-	      if (format[-1] == 'e' || format[-1] == 'f' || format[-1] == 'g')
-		sprintf (p, this_format, XFLOAT_DATA (args[n]));
-	      else
-		{
-		  if (sizeof (EMACS_INT) > sizeof (int)
-		      && format[-1] != 'c')
-		    {
-		      /* Insert 'l' before format spec.  */
-		      this_format[format - this_format_start]
-			= this_format[format - this_format_start - 1];
-		      this_format[format - this_format_start - 1] = 'l';
-		      this_format[format - this_format_start + 1] = 0;
-		    }
-
-		  if (INTEGERP (args[n]))
-		    {
-		      if (format[-1] == 'c')
-			sprintf (p, this_format, (int) XINT (args[n]));
-		      else if (format[-1] == 'd')
-			sprintf (p, this_format, XINT (args[n]));
+	      /* Null-terminate the conversion specification temporarily.  */
+	      char c = *format;
+	      *format = 0;
+
+	      /* Don't allow field width or precision greater than INT_MAX,
+		 as that could make snprintf go kaflooey.  */
+	      if (INT_MAX < field_width
+		  || (INT_MAX < precision && precision < UINTMAX_MAX))
+		format_overflow ();
+
+	      /* Insert pWIDE before an integer conversion specifier.  */
+	      if (pWIDElen
+		  && (conversion == 'd' || conversion == 'i'
+		      || conversion == 'o' || conversion == 'x'
+		      || conversion == 'X'))
+		{
+		  memmove (convspec - pWIDElen, convspec,
+			   format - convspec - 1);
+		  memcpy (format - pWIDElen - 1, pWIDE, pWIDElen);
+		  convspec -= pWIDElen;
+		}
+
+	      /* Append a single value to the output buffer.  */
+
+	      while (1)
+		{
+		  /* Don't pass a buffer size greater than INT_MAX to
+		     snprintf, as this causes snprintf to fail.  */
+		  int prsize = min (INT_MAX, buf + bufsize - p);
+
+		  /* There are four types of conversion: double, unsigned
+		     char (passed as int), wide signed int, and wide
+		     unsigned int.  Treat them separately because the
+		     snprintf ABI is sensitive to which type is passed.  Be
+		     careful about integer overflow, NaNs, infinities, and
+		     conversions; for example, the min and max macros are
+		     not suitable here.  */
+		  if (conversion == 'e' || conversion == 'f'
+		      || conversion == 'g')
+		    {
+		      double x = (INTEGERP (args[n])
+				  ? XINT (args[n])
+				  : XFLOAT_DATA (args[n]));
+		      this_nchars = snprintf (p, prsize, convspec, x);
+		    }
+		  else if (conversion == 'c')
+		    {
+		      unsigned char x;
+		      if (INTEGERP (args[n]))
+			x = XINT (args[n]);
+		      else
+			{
+			  double d = XFLOAT_DATA (args[n]);
+			  if (d < 0)
+			    {
+			      intmax_t i = INTMAX_MIN;
+			      if (i < d)
+				i = d;
+			      x = i;
+			    }
+			  else
+			    {
+			      uintmax_t u = UINTMAX_MAX;
+			      if (d < u)
+				u = d;
+			      x = u;
+			    }
+			}
+		      this_nchars = snprintf (p, prsize, convspec, x);
+		    }
+		  else if (conversion == 'd')
+		    {
+		      /* For float, maybe we should use "%1.0f"
+			 instead so it also works for values outside
+			 the integer range.  */
+		      signed_wide x;
+		      if (INTEGERP (args[n]))
+			x = XINT (args[n]);
+		      else
+			{
+			  double d = XFLOAT_DATA (args[n]);
+			  if (d < 0)
+			    {
+			      x = TYPE_MINIMUM (signed_wide);
+			      if (x < d)
+				x = d;
+			    }
+			  else
+			    {
+			      x = TYPE_MAXIMUM (signed_wide);
+			      if (d < x)
+				x = d;
+			    }
+			}
+		      this_nchars = snprintf (p, prsize, convspec, x);
+		    }
+		  else
+		    {
 		      /* Don't sign-extend for octal or hex printing.  */
+		      unsigned_wide x;
+		      if (INTEGERP (args[n]))
+			x = XUINT (args[n]);
 		      else
-			sprintf (p, this_format, XUINT (args[n]));
+			{
+			  double d = XFLOAT_DATA (args[n]);
+			  if (d < 0)
+			    x = 0;
+			  else
+			    {
+			      x = TYPE_MAXIMUM (unsigned_wide);
+			      if (d < x)
+				x = d;
+			    }
+			}
+		      this_nchars = snprintf (p, prsize, convspec, x);
 		    }
-		  else if (format[-1] == 'c')
-		    sprintf (p, this_format, (int) XFLOAT_DATA (args[n]));
-		  else if (format[-1] == 'd')
-		    /* Maybe we should use "%1.0f" instead so it also works
-		       for values larger than MAXINT.  */
-		    sprintf (p, this_format, (EMACS_INT) XFLOAT_DATA (args[n]));
-		  else
-		    /* Don't sign-extend for octal or hex printing.  */
-		    sprintf (p, this_format, (EMACS_UINT) XFLOAT_DATA (args[n]));
+
+		  if (this_nchars < 0)
+		    format_overflow ();
+		  if (this_nchars <= buf + bufsize - p)
+		    break;
+		  BUF_ROOM (this_nchars);
 		}

+	      *format = c;
+
 	      if (p > buf
 		  && multibyte
 		  && !ASCII_BYTE_P (*((unsigned char *) p - 1))
 		  && !CHAR_HEAD_P (*((unsigned char *) p)))
 		maybe_combine_byte = 1;
-	      this_nchars = strlen (p);
+
 	      if (multibyte)
-		p += str_to_multibyte ((unsigned char *) p,
-				       buf + total - 1 - p, this_nchars);
+		{
+		  EMACS_INT outbytes =
+		    parse_str_to_multibyte ((unsigned char *) p, this_nchars);
+		  BUF_ROOM (outbytes);
+		  p += str_to_multibyte ((unsigned char *) p, outbytes,
+					 this_nchars);
+		}
 	      else
 		p += this_nchars;
 	      nchars += this_nchars;
 	      info[n].end = nchars;
 	    }
-
-	}
-      else if (STRING_MULTIBYTE (args[0]))
-	{
-	  /* Copy a whole multibyte character.  */
-	  if (p > buf
-	      && multibyte
-	      && !ASCII_BYTE_P (*((unsigned char *) p - 1))
-	      && !CHAR_HEAD_P (*format))
-	    maybe_combine_byte = 1;
-	  *p++ = *format++;
-	  while (! CHAR_HEAD_P (*format))
-	    {
-	      discarded[format - format_start] = 2;
-	      *p++ = *format++;
-	    }
-	  nchars++;
-	}
-      else if (multibyte)
-	{
-	  /* Convert a single-byte character to multibyte.  */
-	  int len = copy_text ((unsigned char *) format, (unsigned char *) p,
-			       1, 0, 1);
-
+	}
+      else
+      copy_char:
+	{
+	  char *src = format;
+	  int len;
+
+	  if (multibyte_format)
+	    {
+	      /* Copy a whole multibyte character.  */
+	      if (p > buf
+		  && !ASCII_BYTE_P (*((unsigned char *) p - 1))
+		  && !CHAR_HEAD_P (*format))
+		maybe_combine_byte = 1;
+
+	      do
+		format++;
+	      while (! CHAR_HEAD_P (*format));
+
+	      len = format - src;
+	      memset (&discarded[src + 1 - format_start], 2, len - 1);
+	    }
+	  else
+	    {
+	      if (multibyte)
+		{
+		  /* Convert a single-byte character to multibyte.  */
+		  len = copy_text ((unsigned char *) format, (unsigned char *) p,
+				   1, 0, 1);
+		}
+	      else
+		len = 1;
+	      format++;
+	    }
+
+	  BUF_ROOM (len);
+	  memcpy (p, src, len);
 	  p += len;
-	  format++;
 	  nchars++;
 	}
-      else
-	*p++ = *format++, nchars++;
     }

-  if (p > buf + total)
+  if (bufsize < p - buf)
     abort ();

   if (maybe_combine_byte)
@@ -4089,7 +4118,7 @@
       if (CONSP (props))
 	{
 	  EMACS_INT bytepos = 0, position = 0, translated = 0;
-	  int argn = 1;
+	  EMACS_INT argn = 1;
 	  Lisp_Object list;

 	  /* Adjust the bounds of each text property






^ permalink raw reply	[flat|nested] 5+ messages in thread

* bug#8668: * editfns.c (Fformat): Fix several integer overflow problems
  2011-05-16  6:56   ` Paul Eggert
@ 2011-05-16  9:27     ` Eli Zaretskii
  2011-05-22 20:54       ` Paul Eggert
  0 siblings, 1 reply; 5+ messages in thread
From: Eli Zaretskii @ 2011-05-16  9:27 UTC (permalink / raw)
  To: Paul Eggert; +Cc: 8668

> Date: Sun, 15 May 2011 23:56:59 -0700
> From: Paul Eggert <eggert@cs.ucla.edu>
> 
> Further inspection has found several other problems with Fformat,
> having to do with large strings and integer overflow.  Here's a
> revised patch to address the problems that I've found.  This
> supersedes the earlier patch.  I'll do more testing on this one
> but thought I'd give people a heads-up.  This patch assumes other
> fixes I've sent in recently.

This uses snprintf where we previously used sprintf.  snprintf is less
portable, and not all supported platforms have a C9x compatible
implementation.  The library used on MSDOS doesn't have snprintf at
all, and those used on MS-Windows may return a negative value when the
buffer is too short (depending on the version of the library), which
your patch doesn't expect.

Is it possible to stay with sprintf?  We only use it for numerical
conversions, whose length can be predicted reliably, no?

> +  error ("Maximum format conversion size exceeded");

I wonder if we could come up with a less cryptic error message.  (What
is a "conversion size"?)  Maybe something like

  Format conversion specifies width/precision that are too large.

> +    if (bufsize <= min_bufsize / bufsize_multiplier)
> +      bufsize = min_bufsize;
> +    else if (bufsize <= max_bufsize / bufsize_multiplier)
> +      bufsize *= bufsize_multiplier;
> +    else if (bufsize <= max_bufsize)
> +      bufsize = max_bufsize;
> +    else
> +      memory_full ();

Why "memory_full"?  You didn't yet allocate such a huge buffer, so
Emacs could still have plenty of memory.  Even after the allocation,
Emacs could still have enough memory.  I think just a call to `error',
or maybe to the new string_overflow or format_overflow functions, is
TRT here, because this is simply another case of the "string too
large" problem, you just succeeded to detect it in advance.  You
actually do call string_overflow and format_overflow elsewhere in the
patch.

> +      if (bufsize - used < additional)					\
> +	{								\
> +	  char *newbuf;							\
> +	  if (INT_ADD_OVERFLOW (used, additional)			\
> +	      || max_bufsize < used + additional)			\
> +	    string_overflow ();						\
> +	  bufsize = used + additional;					\
> +	  bufsize = (bufsize < max_bufsize / 3 * 2			\
> +		     ? bufsize + bufsize / 2				\
> +		     : max_bufsize);					\
> +	  SAFE_ALLOCA (newbuf, char *, bufsize);			\
> +	  memcpy (newbuf, buf, used);					\

This wastes memory, because the old buffer is not released (when
SAFE_ALLOCA uses xmalloc).  I'm not sure it is a good idea to use
SAFE_ALLOCA in a loop like that.  Perhaps xmalloc/xrealloc would be
better here, since you are keeping the previous contents of the
buffer?

> +    EMACS_INT i, extralen = 2 * formatlen + pWIDElen + 1;
> +    if (SIZE_MAX < extralen
> +	|| (SIZE_MAX - extralen) / sizeof (struct info) <= nargs)
> +      memory_full ();

I think this memory_full should also be replaced with a call to
`error' or format_overflow (string_overflow doesn't seem to fit here,
IMO), for the same reasons pointed out above.

> +	  char conversion;
> ...
> +	  conversion = *format;

I think this is wrong (and the original code had it also wrong).
`format' could be a multibyte string, so it isn't right to take a
single byte out of it and assume you've got a full character; it could
be the 1st byte of a multibyte sequence.  It would also trip you here:

> +	  else if (! (conversion == 'c' || conversion == 'd'
> +		      || conversion == 'e' || conversion == 'f'
> +		      || conversion == 'g' || conversion == 'i'
> +		      || conversion == 'o' || conversion == 'x'
> +		      || conversion == 'X'))
> +	    error ("Invalid format operation %%%c", conversion);

If `conversion' is the first byte of a multibyte sequence, the error
message will show some random value that has no immediate relation to
the actual character after the `%'.

I think you need to make `conversion' an int, and assign its value
with STRING_CHAR.  Or at the very least do that where you call `error'
with the above error message, so that the error identifies the
problematic character.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* bug#8668: * editfns.c (Fformat): Fix several integer overflow problems
  2011-05-16  9:27     ` Eli Zaretskii
@ 2011-05-22 20:54       ` Paul Eggert
  0 siblings, 0 replies; 5+ messages in thread
From: Paul Eggert @ 2011-05-22 20:54 UTC (permalink / raw)
  To: Eli Zaretskii; +Cc: 8668

Thanks for the review; it improved the patch.
Addressing the review's points one by one:

On 05/16/11 02:27, Eli Zaretskii wrote:
> Is it possible to stay with sprintf?

Yes.  I reworked the patch and the revised patch (below) uses sprintf
instead of snprintf.  The basic idea is to sprintf to a fixed-size
buffer using no width and a small precision, and then insert ' ' and
'0' as needed when copying the formatted buffer from the fixed-size
buffer to the main buffer.

>> +  error ("Maximum format conversion size exceeded");
> 
> I wonder if we could come up with a less cryptic error message.

This issue is moot now, as the revised patch never needs the diagnostic.

>> +    else if (bufsize <= max_bufsize)
>> +      bufsize = max_bufsize;
>> +    else
>> +      memory_full ();
> 
> Why "memory_full"?  You didn't yet allocate such a huge buffer, so
> Emacs could still have plenty of memory.

OK, I turned that one into string_overflow ().

> I'm not sure it is a good idea to use SAFE_ALLOCA in a loop like
> that.  Perhaps xmalloc/xrealloc would be better here, since you are
> keeping the previous contents of the buffer?

Good idea, thanks; the attached patch does this.

>> +    EMACS_INT i, extralen = 2 * formatlen + pWIDElen + 1;
>> +    if (SIZE_MAX < extralen
>> +	|| (SIZE_MAX - extralen) / sizeof (struct info) <= nargs)
>> +      memory_full ();
> 
> I think this memory_full should also be replaced with a call to
> `error' or format_overflow (string_overflow doesn't seem to fit here,
> IMO), for the same reasons pointed out above.

You're right that string_overflow doesn't fit.  However, other parts
of Emacs also invoke memory_full when they can't invoke xmalloc due to
size_t overflow or problems with Emacs's internal tagging of pointers.
The C functions that do this are lisp_malloc, lisp_align_malloc,
grow_menu_items, read_minibuf_noninteractive, and emacs_readlink, and
x_send_scroll_bar_event.  Perhaps all these functions (and Fformat as
well) should be changed to invoke memory_full only when malloc has
actually failed, but that should be a different patch, and for now I'd
like to fix the Fformat bugs without worrying about this other issue.

> I think you need to make `conversion' an int, and assign its value
> with STRING_CHAR.  Or at the very least do that where you call `error'
> with the above error message, so that the error identifies the
> problematic character.

Thanks, I did the latter in the revised patch.

Here's the revised patch:

Rework Fformat to avoid integer overflow issues.
* editfns.c: Include <float.h> unconditionally, as it's everywhere
now (part of C89).  Include <verify.h>.
(MAX_10_EXP, CONVERTED_BYTE_SIZE): Remove; no longer needed.
(pWIDE, pWIDElen, signed_wide, unsigned_wide): New defns.
(Fformat): Avoid the prepass trying to compute sizes; it was only
approximate and thus did not catch overflow reliably.  Instead, walk
through the format just once, formatting and computing sizes as we go,
checking for integer overflow at every step, and allocating a larger
buffer as needed.  Keep track separately whether the format is
multibyte.  Keep only the most-recently calculated precision, rather
than them all.  Record whether each argument has been converted to
string.  Use EMACS_INT, not int, for byte and char and arg counts.
Support field widths and precisions larger than INT_MAX.  Avoid
sprintf's undefined behavior with conversion specifications such as %#d
and %.0c.  Fix bug with strchr succeeding on '\0' when looking for
flags.  Fix bug with (format "%c" 256.0).  Avoid integer overflow when
formatting out-of-range floating point numbers with int
formats. (Bug#8668)
=== modified file 'src/editfns.c'
--- src/editfns.c	2011-05-15 17:17:44 +0000
+++ src/editfns.c	2011-05-22 20:20:04 +0000
@@ -45,9 +45,11 @@
 #endif

 #include <ctype.h>
+#include <float.h>
 #include <limits.h>
 #include <intprops.h>
 #include <strftime.h>
+#include <verify.h>

 #include "intervals.h"
 #include "buffer.h"
@@ -57,13 +59,6 @@
 #include "window.h"
 #include "blockinput.h"

-#ifdef STDC_HEADERS
-#include <float.h>
-#define MAX_10_EXP	DBL_MAX_10_EXP
-#else
-#define MAX_10_EXP	310
-#endif
-
 #ifndef NULL
 #define NULL 0
 #endif
@@ -3525,14 +3520,21 @@
   RETURN_UNGCPRO (string);
 }

-
-/* Number of bytes that STRING will occupy when put into the result.
-   MULTIBYTE is nonzero if the result should be multibyte.  */
-
-#define CONVERTED_BYTE_SIZE(MULTIBYTE, STRING)				\
-  (((MULTIBYTE) && ! STRING_MULTIBYTE (STRING))				\
-   ? count_size_as_multibyte (SDATA (STRING), SBYTES (STRING))		\
-   : SBYTES (STRING))
+/* pWIDE is a conversion for printing large decimal integers (possibly with a
+   trailing "d" that is ignored).  pWIDElen is its length.  signed_wide and
+   unsigned_wide are signed and unsigned types for printing them.  Use widest
+   integers if available so that more floating point values can be converted.  */
+#ifdef PRIdMAX
+# define pWIDE PRIdMAX
+enum { pWIDElen = sizeof PRIdMAX - 2 }; /* Don't count trailing "d".  */
+typedef intmax_t signed_wide;
+typedef uintmax_t unsigned_wide;
+#else
+# define pWIDE pI
+enum { pWIDElen = sizeof pI - 1 };
+typedef EMACS_INT signed_wide;
+typedef EMACS_UINT unsigned_wide;
+#endif

 DEFUN ("format", Fformat, Sformat, 1, MANY, 0,
        doc: /* Format a string out of a format-string and arguments.
@@ -3583,11 +3585,17 @@
 usage: (format STRING &rest OBJECTS)  */)
   (size_t nargs, register Lisp_Object *args)
 {
-  register size_t n;		/* The number of the next arg to substitute */
-  register size_t total;	/* An estimate of the final length */
-  char *buf, *p;
+  EMACS_INT n;		/* The number of the next arg to substitute */
+  char initial_buffer[4000];
+  char *buf = initial_buffer;
+  EMACS_INT bufsize = sizeof initial_buffer;
+  EMACS_INT max_bufsize = min (MOST_POSITIVE_FIXNUM + 1, SIZE_MAX);
+  char *p;
+  Lisp_Object buf_save_value IF_LINT (= {0});
   register char *format, *end, *format_start;
-  int nchars;
+  EMACS_INT formatlen, nchars;
+  /* Nonzero if the format is multibyte.  */
+  int multibyte_format = 0;
   /* Nonzero if the output should be a multibyte string,
      which is true if any of the inputs is one.  */
   int multibyte = 0;
@@ -3596,14 +3604,6 @@
      multibyte character of the previous string.  This flag tells if we
      must consider such a situation or not.  */
   int maybe_combine_byte;
-  char *this_format;
-  /* Precision for each spec, or -1, a flag value meaning no precision
-     was given in that spec.  Element 0, corresponding to the format
-     string itself, will not be used.  Element NARGS, corresponding to
-     no argument, *will* be assigned to in the case that a `%' and `.'
-     occur after the final format specifier.  */
-  int *precision = (int *) (alloca ((nargs + 1) * sizeof (int)));
-  int longest_format;
   Lisp_Object val;
   int arg_intervals = 0;
   USE_SAFE_ALLOCA;
@@ -3611,458 +3611,603 @@
   /* discarded[I] is 1 if byte I of the format
      string was not copied into the output.
      It is 2 if byte I was not the first byte of its character.  */
-  char *discarded = 0;
+  char *discarded;

   /* Each element records, for one argument,
      the start and end bytepos in the output string,
+     whether the argument has been converted to string (e.g., due to "%S"),
      and whether the argument is a string with intervals.
      info[0] is unused.  Unused elements have -1 for start.  */
   struct info
   {
-    int start, end, intervals;
+    EMACS_INT start, end;
+    int converted_to_string;
+    int intervals;
   } *info = 0;

   /* It should not be necessary to GCPRO ARGS, because
      the caller in the interpreter should take care of that.  */

+  CHECK_STRING (args[0]);
+  format_start = SSDATA (args[0]);
+  formatlen = SBYTES (args[0]);
+
+  /* Allocate the info and discarded tables.  */
+  {
+    EMACS_INT i;
+    if ((SIZE_MAX - formatlen) / sizeof (struct info) <= nargs)
+      memory_full ();
+    SAFE_ALLOCA (info, struct info *, (nargs + 1) * sizeof *info + formatlen);
+    discarded = (char *) &info[nargs + 1];
+    for (i = 0; i < nargs + 1; i++)
+      {
+	info[i].start = -1;
+	info[i].intervals = info[i].converted_to_string = 0;
+      }
+    memset (discarded, 0, formatlen);
+  }
+
   /* Try to determine whether the result should be multibyte.
      This is not always right; sometimes the result needs to be multibyte
      because of an object that we will pass through prin1,
      and in that case, we won't know it here.  */
-  for (n = 0; n < nargs; n++)
-    {
-      if (STRINGP (args[n]) && STRING_MULTIBYTE (args[n]))
-	multibyte = 1;
-      /* Piggyback on this loop to initialize precision[N]. */
-      precision[n] = -1;
-    }
-  precision[nargs] = -1;
-
-  CHECK_STRING (args[0]);
-  /* We may have to change "%S" to "%s". */
-  args[0] = Fcopy_sequence (args[0]);
-
-  /* GC should never happen here, so abort if it does.  */
-  abort_on_gc++;
+  multibyte_format = STRING_MULTIBYTE (args[0]);
+  multibyte = multibyte_format;
+  for (n = 1; !multibyte && n < nargs; n++)
+    if (STRINGP (args[n]) && STRING_MULTIBYTE (args[n]))
+      multibyte = 1;

   /* If we start out planning a unibyte result,
-     then discover it has to be multibyte, we jump back to retry.
-     That can only happen from the first large while loop below.  */
+     then discover it has to be multibyte, we jump back to retry.  */
  retry:

-  format = SSDATA (args[0]);
-  format_start = format;
-  end = format + SBYTES (args[0]);
-  longest_format = 0;
-
-  /* Make room in result for all the non-%-codes in the control string.  */
-  total = 5 + CONVERTED_BYTE_SIZE (multibyte, args[0]) + 1;
-
-  /* Allocate the info and discarded tables.  */
-  {
-    size_t nbytes = (nargs+1) * sizeof *info;
-    size_t i;
-    if (!info)
-      info = (struct info *) alloca (nbytes);
-    memset (info, 0, nbytes);
-    for (i = 0; i < nargs + 1; i++)
-      info[i].start = -1;
-    if (!discarded)
-      SAFE_ALLOCA (discarded, char *, SBYTES (args[0]));
-    memset (discarded, 0, SBYTES (args[0]));
-  }
-
-  /* Add to TOTAL enough space to hold the converted arguments.  */
-
-  n = 0;
-  while (format != end)
-    if (*format++ == '%')
-      {
-	EMACS_INT thissize = 0;
-	EMACS_INT actual_width = 0;
-	char *this_format_start = format - 1;
-	int field_width = 0;
-
-	/* General format specifications look like
-
-	   '%' [flags] [field-width] [precision] format
-
-	   where
-
-	   flags	::= [-+ #0]+
-	   field-width	::= [0-9]+
-	   precision	::= '.' [0-9]*
-
-	   If a field-width is specified, it specifies to which width
-	   the output should be padded with blanks, if the output
-	   string is shorter than field-width.
-
-	   If precision is specified, it specifies the number of
-	   digits to print after the '.' for floats, or the max.
-	   number of chars to print from a string.  */
-
-	while (format != end
-	       && (*format == '-' || *format == '0' || *format == '#'
-		   || * format == ' ' || *format == '+'))
-	  ++format;
-
-	if (*format >= '0' && *format <= '9')
-	  {
-	    for (field_width = 0; *format >= '0' && *format <= '9'; ++format)
-	      field_width = 10 * field_width + *format - '0';
-	  }
-
-	/* N is not incremented for another few lines below, so refer to
-	   element N+1 (which might be precision[NARGS]). */
-	if (*format == '.')
-	  {
-	    ++format;
-	    for (precision[n+1] = 0; *format >= '0' && *format <= '9'; ++format)
-	      precision[n+1] = 10 * precision[n+1] + *format - '0';
-	  }
-
-	/* Extra +1 for 'l' that we may need to insert into the
-	   format.  */
-	if (format - this_format_start + 2 > longest_format)
-	  longest_format = format - this_format_start + 2;
-
-	if (format == end)
-	  error ("Format string ends in middle of format specifier");
-	if (*format == '%')
-	  format++;
-	else if (++n >= nargs)
-	  error ("Not enough arguments for format string");
-	else if (*format == 'S')
-	  {
-	    /* For `S', prin1 the argument and then treat like a string.  */
-	    register Lisp_Object tem;
-	    tem = Fprin1_to_string (args[n], Qnil);
-	    if (STRING_MULTIBYTE (tem) && ! multibyte)
-	      {
-		multibyte = 1;
-		goto retry;
-	      }
-	    args[n] = tem;
-	    /* If we restart the loop, we should not come here again
-	       because args[n] is now a string and calling
-	       Fprin1_to_string on it produces superflous double
-	       quotes.  So, change "%S" to "%s" now.  */
-	    *format = 's';
-	    goto string;
-	  }
-	else if (SYMBOLP (args[n]))
-	  {
-	    args[n] = SYMBOL_NAME (args[n]);
-	    if (STRING_MULTIBYTE (args[n]) && ! multibyte)
-	      {
-		multibyte = 1;
-		goto retry;
-	      }
-	    goto string;
-	  }
-	else if (STRINGP (args[n]))
-	  {
-	  string:
-	    if (*format != 's' && *format != 'S')
-	      error ("Format specifier doesn't match argument type");
-	    /* In the case (PRECISION[N] > 0), THISSIZE may not need
-	       to be as large as is calculated here.  Easy check for
-	       the case PRECISION = 0. */
-	    thissize = precision[n] ? CONVERTED_BYTE_SIZE (multibyte, args[n]) : 0;
-	    /* The precision also constrains how much of the argument
-	       string will finally appear (Bug#5710). */
-	    actual_width = lisp_string_width (args[n], -1, NULL, NULL);
-	    if (precision[n] != -1)
-	      actual_width = min (actual_width, precision[n]);
-	  }
-	/* Would get MPV otherwise, since Lisp_Int's `point' to low memory.  */
-	else if (INTEGERP (args[n]) && *format != 's')
-	  {
-	    /* The following loop assumes the Lisp type indicates
-	       the proper way to pass the argument.
-	       So make sure we have a flonum if the argument should
-	       be a double.  */
-	    if (*format == 'e' || *format == 'f' || *format == 'g')
-	      args[n] = Ffloat (args[n]);
-	    else
-	      if (*format != 'd' && *format != 'o' && *format != 'x'
-		  && *format != 'i' && *format != 'X' && *format != 'c')
-		error ("Invalid format operation %%%c", *format);
-
-	    thissize = 30 + (precision[n] > 0 ? precision[n] : 0);
-	    if (*format == 'c')
-	      {
-		if (! ASCII_CHAR_P (XINT (args[n]))
-		    /* Note: No one can remember why we have to treat
-		       the character 0 as a multibyte character here.
-		       But, until it causes a real problem, let's
-		       don't change it.  */
-		    || XINT (args[n]) == 0)
-		  {
-		    if (! multibyte)
-		      {
-			multibyte = 1;
-			goto retry;
-		      }
-		    args[n] = Fchar_to_string (args[n]);
-		    thissize = SBYTES (args[n]);
-		  }
-	      }
-	  }
-	else if (FLOATP (args[n]) && *format != 's')
-	  {
-	    if (! (*format == 'e' || *format == 'f' || *format == 'g'))
-	      {
-		if (*format != 'd' && *format != 'o' && *format != 'x'
-		    && *format != 'i' && *format != 'X' && *format != 'c')
-		  error ("Invalid format operation %%%c", *format);
-		/* This fails unnecessarily if args[n] is bigger than
-		   most-positive-fixnum but smaller than MAXINT.
-		   These cases are important because we sometimes use floats
-		   to represent such integer values (typically such values
-		   come from UIDs or PIDs).  */
-		/* args[n] = Ftruncate (args[n], Qnil); */
-	      }
-
-	    /* Note that we're using sprintf to print floats,
-	       so we have to take into account what that function
-	       prints.  */
-	    /* Filter out flag value of -1.  */
-	    thissize = (MAX_10_EXP + 100
-			+ (precision[n] > 0 ? precision[n] : 0));
-	  }
-	else
-	  {
-	    /* Anything but a string, convert to a string using princ.  */
-	    register Lisp_Object tem;
-	    tem = Fprin1_to_string (args[n], Qt);
-	    if (STRING_MULTIBYTE (tem) && ! multibyte)
-	      {
-		multibyte = 1;
-		goto retry;
-	      }
-	    args[n] = tem;
-	    goto string;
-	  }
-
-	thissize += max (0, field_width - actual_width);
-	total += thissize + 4;
-      }
-
-  abort_on_gc--;
-
-  /* Now we can no longer jump to retry.
-     TOTAL and LONGEST_FORMAT are known for certain.  */
-
-  this_format = (char *) alloca (longest_format + 1);
-
-  /* Allocate the space for the result.
-     Note that TOTAL is an overestimate.  */
-  SAFE_ALLOCA (buf, char *, total);
-
   p = buf;
   nchars = 0;
   n = 0;

   /* Scan the format and store result in BUF.  */
-  format = SSDATA (args[0]);
-  format_start = format;
-  end = format + SBYTES (args[0]);
+  format = format_start;
+  end = format + formatlen;
   maybe_combine_byte = 0;
+
   while (format != end)
     {
+      /* The values of N and FORMAT when the loop body is entered.  */
+      EMACS_INT n0 = n;
+      char *format0 = format;
+
+      /* Bytes needed to represent the output of this conversion.  */
+      EMACS_INT convbytes;
+
       if (*format == '%')
 	{
-	  int minlen;
-	  int negative = 0;
-	  char *this_format_start = format;
-
+	  /* General format specifications look like
+
+	     '%' [flags] [field-width] [precision] format
+
+	     where
+
+	     flags ::= [-+0# ]+
+	     field-width ::= [0-9]+
+	     precision ::= '.' [0-9]*
+
+	     If a field-width is specified, it specifies to which width
+	     the output should be padded with blanks, if the output
+	     string is shorter than field-width.
+
+	     If precision is specified, it specifies the number of
+	     digits to print after the '.' for floats, or the max.
+	     number of chars to print from a string.  */
+
+	  int minus_flag = 0;
+	  int  plus_flag = 0;
+	  int space_flag = 0;
+	  int sharp_flag = 0;
+	  int  zero_flag = 0;
+	  EMACS_INT field_width;
+	  int precision_given;
+	  uintmax_t precision = UINTMAX_MAX;
+	  char *num_end;
+	  char conversion;
+
+	  while (1)
+	    {
+	      switch (*++format)
+		{
+		case '-': minus_flag = 1; continue;
+		case '+':  plus_flag = 1; continue;
+		case ' ': space_flag = 1; continue;
+		case '#': sharp_flag = 1; continue;
+		case '0':  zero_flag = 1; continue;
+		}
+	      break;
+	    }
+
+	  /* Ignore flags when sprintf ignores them.  */
+	  space_flag &= ~ plus_flag;
+	  zero_flag &= ~ minus_flag;
+
+	  {
+	    uintmax_t w = strtoumax (format, &num_end, 10);
+	    if (max_bufsize <= w)
+	      string_overflow ();
+	    field_width = w;
+	  }
+	  precision_given = *num_end == '.';
+	  if (precision_given)
+	    precision = strtoumax (num_end + 1, &num_end, 10);
+	  format = num_end;
+
+	  if (format == end)
+	    error ("Format string ends in middle of format specifier");
+
+	  memset (&discarded[format0 - format_start], 1, format - format0);
+	  conversion = *format;
+	  if (conversion == '%')
+	    goto copy_char;
 	  discarded[format - format_start] = 1;
 	  format++;

-	  while (strchr ("-+0# ", *format))
-	    {
-	      if (*format == '-')
-		{
-		  negative = 1;
-		}
-	      discarded[format - format_start] = 1;
-	      ++format;
-	    }
-
-	  minlen = atoi (format);
-
-	  while ((*format >= '0' && *format <= '9') || *format == '.')
-	    {
-	      discarded[format - format_start] = 1;
-	      format++;
-	    }
-
-	  if (*format++ == '%')
-	    {
-	      *p++ = '%';
-	      nchars++;
-	      continue;
-	    }
-
 	  ++n;
-
-	  discarded[format - format_start - 1] = 1;
-	  info[n].start = nchars;
-
-	  if (STRINGP (args[n]))
+	  if (! (n < nargs))
+	    error ("Not enough arguments for format string");
+
+	  /* For 'S', prin1 the argument, and then treat like 's'.
+	     For 's', princ any argument that is not a string or
+	     symbol.  But don't do this conversion twice, which might
+	     happen after retrying.  */
+	  if ((conversion == 'S'
+	       || (conversion == 's'
+		   && ! STRINGP (args[n]) && ! SYMBOLP (args[n]))))
+	    {
+	      if (! info[n].converted_to_string)
+		{
+		  Lisp_Object noescape = conversion == 'S' ? Qnil : Qt;
+		  args[n] = Fprin1_to_string (args[n], noescape);
+		  info[n].converted_to_string = 1;
+		  if (STRING_MULTIBYTE (args[n]) && ! multibyte)
+		    {
+		      multibyte = 1;
+		      goto retry;
+		    }
+		}
+	      conversion = 's';
+	    }
+	  else if (conversion == 'c')
+	    {
+	      if (FLOATP (args[n]))
+		{
+		  double d = XFLOAT_DATA (args[n]);
+		  args[n] = make_number (FIXNUM_OVERFLOW_P (d) ? -1 : d);
+		}
+
+	      if (INTEGERP (args[n]) && ! ASCII_CHAR_P (XINT (args[n])))
+		{
+		  if (!multibyte)
+		    {
+		      multibyte = 1;
+		      goto retry;
+		    }
+		  args[n] = Fchar_to_string (args[n]);
+		  info[n].converted_to_string = 1;
+		}
+
+	      if (info[n].converted_to_string)
+		conversion = 's';
+	      zero_flag = 0;
+	    }
+
+	  if (SYMBOLP (args[n]))
+	    {
+	      args[n] = SYMBOL_NAME (args[n]);
+	      if (STRING_MULTIBYTE (args[n]) && ! multibyte)
+		{
+		  multibyte = 1;
+		  goto retry;
+		}
+	    }
+
+	  if (conversion == 's')
 	    {
 	      /* handle case (precision[n] >= 0) */

-	      int width, padding;
-	      EMACS_INT nbytes, start;
+	      EMACS_INT width, padding, nbytes;
 	      EMACS_INT nchars_string;

+	      EMACS_INT prec = -1;
+	      if (precision_given && precision <= TYPE_MAXIMUM (EMACS_INT))
+		prec = precision;
+
 	      /* lisp_string_width ignores a precision of 0, but GNU
 		 libc functions print 0 characters when the precision
 		 is 0.  Imitate libc behavior here.  Changing
 		 lisp_string_width is the right thing, and will be
 		 done, but meanwhile we work with it. */

-	      if (precision[n] == 0)
+	      if (prec == 0)
 		width = nchars_string = nbytes = 0;
-	      else if (precision[n] > 0)
-		width = lisp_string_width (args[n], precision[n],
-					   &nchars_string, &nbytes);
-	      else
-		{		/* no precision spec given for this argument */
-		  width = lisp_string_width (args[n], -1, NULL, NULL);
-		  nbytes = SBYTES (args[n]);
-		  nchars_string = SCHARS (args[n]);
-		}
-
-	      /* If spec requires it, pad on right with spaces.  */
-	      padding = minlen - width;
-	      if (! negative)
-		while (padding-- > 0)
-		  {
-		    *p++ = ' ';
-		    ++nchars;
-		  }
-
-	      info[n].start = start = nchars;
-	      nchars += nchars_string;
-
-	      if (p > buf
-		  && multibyte
-		  && !ASCII_BYTE_P (*((unsigned char *) p - 1))
-		  && STRING_MULTIBYTE (args[n])
-		  && !CHAR_HEAD_P (SREF (args[n], 0)))
-		maybe_combine_byte = 1;
-
-	      p += copy_text (SDATA (args[n]), (unsigned char *) p,
-			      nbytes,
-			      STRING_MULTIBYTE (args[n]), multibyte);
-
-	      info[n].end = nchars;
-
-	      if (negative)
-		while (padding-- > 0)
-		  {
-		    *p++ = ' ';
-		    nchars++;
-		  }
-
-	      /* If this argument has text properties, record where
-		 in the result string it appears.  */
-	      if (STRING_INTERVALS (args[n]))
-		info[n].intervals = arg_intervals = 1;
-	    }
-	  else if (INTEGERP (args[n]) || FLOATP (args[n]))
-	    {
-	      int this_nchars;
-
-	      memcpy (this_format, this_format_start,
-		      format - this_format_start);
-	      this_format[format - this_format_start] = 0;
-
-	      if (format[-1] == 'e' || format[-1] == 'f' || format[-1] == 'g')
-		sprintf (p, this_format, XFLOAT_DATA (args[n]));
-	      else
-		{
-		  if (sizeof (EMACS_INT) > sizeof (int)
-		      && format[-1] != 'c')
-		    {
-		      /* Insert 'l' before format spec.  */
-		      this_format[format - this_format_start]
-			= this_format[format - this_format_start - 1];
-		      this_format[format - this_format_start - 1] = 'l';
-		      this_format[format - this_format_start + 1] = 0;
-		    }
-
-		  if (INTEGERP (args[n]))
-		    {
-		      if (format[-1] == 'c')
-			sprintf (p, this_format, (int) XINT (args[n]));
-		      else if (format[-1] == 'd')
-			sprintf (p, this_format, XINT (args[n]));
-		      /* Don't sign-extend for octal or hex printing.  */
-		      else
-			sprintf (p, this_format, XUINT (args[n]));
-		    }
-		  else if (format[-1] == 'c')
-		    sprintf (p, this_format, (int) XFLOAT_DATA (args[n]));
-		  else if (format[-1] == 'd')
-		    /* Maybe we should use "%1.0f" instead so it also works
-		       for values larger than MAXINT.  */
-		    sprintf (p, this_format, (EMACS_INT) XFLOAT_DATA (args[n]));
-		  else
-		    /* Don't sign-extend for octal or hex printing.  */
-		    sprintf (p, this_format, (EMACS_UINT) XFLOAT_DATA (args[n]));
-		}
-
-	      if (p > buf
-		  && multibyte
-		  && !ASCII_BYTE_P (*((unsigned char *) p - 1))
-		  && !CHAR_HEAD_P (*((unsigned char *) p)))
-		maybe_combine_byte = 1;
-	      this_nchars = strlen (p);
-	      if (multibyte)
-		p += str_to_multibyte ((unsigned char *) p,
-				       buf + total - 1 - p, this_nchars);
-	      else
-		p += this_nchars;
-	      nchars += this_nchars;
-	      info[n].end = nchars;
-	    }
-
-	}
-      else if (STRING_MULTIBYTE (args[0]))
-	{
-	  /* Copy a whole multibyte character.  */
-	  if (p > buf
-	      && multibyte
-	      && !ASCII_BYTE_P (*((unsigned char *) p - 1))
-	      && !CHAR_HEAD_P (*format))
-	    maybe_combine_byte = 1;
-	  *p++ = *format++;
-	  while (! CHAR_HEAD_P (*format))
-	    {
-	      discarded[format - format_start] = 2;
-	      *p++ = *format++;
-	    }
-	  nchars++;
-	}
-      else if (multibyte)
-	{
-	  /* Convert a single-byte character to multibyte.  */
-	  int len = copy_text ((unsigned char *) format, (unsigned char *) p,
-			       1, 0, 1);
-
-	  p += len;
-	  format++;
-	  nchars++;
+	      else
+		{
+		  EMACS_INT nch, nby;
+		  width = lisp_string_width (args[n], prec, &nch, &nby);
+		  if (prec < 0)
+		    {
+		      nchars_string = SCHARS (args[n]);
+		      nbytes = SBYTES (args[n]);
+		    }
+		  else
+		    {
+		      nchars_string = nch;
+		      nbytes = nby;
+		    }
+		}
+
+	      convbytes = nbytes;
+	      if (convbytes && multibyte && ! STRING_MULTIBYTE (args[n]))
+		convbytes = count_size_as_multibyte (SDATA (args[n]), nbytes);
+
+	      padding = width < field_width ? field_width - width : 0;
+
+	      if (max_bufsize - padding <= convbytes)
+		string_overflow ();
+	      convbytes += padding;
+	      if (convbytes <= buf + bufsize - p)
+		{
+		  if (! minus_flag)
+		    {
+		      memset (p, ' ', padding);
+		      p += padding;
+		      nchars += padding;
+		    }
+
+		  if (p > buf
+		      && multibyte
+		      && !ASCII_BYTE_P (*((unsigned char *) p - 1))
+		      && STRING_MULTIBYTE (args[n])
+		      && !CHAR_HEAD_P (SREF (args[n], 0)))
+		    maybe_combine_byte = 1;
+
+		  p += copy_text (SDATA (args[n]), (unsigned char *) p,
+				  nbytes,
+				  STRING_MULTIBYTE (args[n]), multibyte);
+
+                  info[n].start = nchars;
+		  nchars += nchars_string;
+		  info[n].end = nchars;
+
+		  if (minus_flag)
+		    {
+		      memset (p, ' ', padding);
+		      p += padding;
+		      nchars += padding;
+		    }
+
+		  /* If this argument has text properties, record where
+		     in the result string it appears.  */
+		  if (STRING_INTERVALS (args[n]))
+		    info[n].intervals = arg_intervals = 1;
+
+		  continue;
+		}
+	    }
+	  else if (! (conversion == 'c' || conversion == 'd'
+		      || conversion == 'e' || conversion == 'f'
+		      || conversion == 'g' || conversion == 'i'
+		      || conversion == 'o' || conversion == 'x'
+		      || conversion == 'X'))
+	    error ("Invalid format operation %%%c",
+		   STRING_CHAR ((unsigned char *) format - 1));
+	  else if (! (INTEGERP (args[n]) || FLOATP (args[n])))
+	    error ("Format specifier doesn't match argument type");
+	  else
+	    {
+	      enum
+	      {
+		/* Maximum precision for a %f conversion such that the
+		   trailing output digit might be nonzero.  Any precisions
+		   larger than this will not yield useful information.  */
+		USEFUL_PRECISION_MAX =
+		  ((1 - DBL_MIN_EXP)
+		   * (FLT_RADIX == 2 || FLT_RADIX == 10 ? 1
+		      : FLT_RADIX == 16 ? 4
+		      : -1)),
+
+		/* Maximum number of bytes generated by any format, if
+		   precision is no more than DBL_USEFUL_PRECISION_MAX.
+		   On all practical hosts, %f is the worst case.  */
+		SPRINTF_BUFSIZE =
+		  sizeof "-." + (DBL_MAX_10_EXP + 1) + USEFUL_PRECISION_MAX
+	      };
+	      verify (0 < USEFUL_PRECISION_MAX);
+
+	      int prec;
+	      EMACS_INT padding, sprintf_bytes;
+	      uintmax_t excess_precision, numwidth;
+	      uintmax_t leading_zeros = 0, trailing_zeros = 0;
+
+	      char sprintf_buf[SPRINTF_BUFSIZE];
+
+	      /* Copy of conversion specification, modified somewhat.
+		 At most three flags F can be specified at once.  */
+	      char convspec[sizeof "%FFF.*d" + pWIDElen];
+
+	      /* Avoid undefined behavior in underlying sprintf.  */
+	      if (conversion == 'd' || conversion == 'i')
+		sharp_flag = 0;
+
+	      /* Create the copy of the conversion specification, with
+		 any width and precision removed, with ".*" inserted,
+		 and with pWIDE inserted for integer formats.  */
+	      {
+		char *f = convspec;
+		*f++ = '%';
+		*f = '-'; f += minus_flag;
+		*f = '+'; f +=  plus_flag;
+		*f = ' '; f += space_flag;
+		*f = '#'; f += sharp_flag;
+		*f = '0'; f +=  zero_flag;
+                *f++ = '.';
+                *f++ = '*';
+		if (conversion == 'd' || conversion == 'i'
+		    || conversion == 'o' || conversion == 'x'
+		    || conversion == 'X')
+		  {
+		    memcpy (f, pWIDE, pWIDElen);
+		    f += pWIDElen;
+		    zero_flag &= ~ precision_given;
+		  }
+		*f++ = conversion;
+		*f = '\0';
+	      }
+
+	      prec = -1;
+	      if (precision_given)
+		prec = min (precision, USEFUL_PRECISION_MAX);
+
+	      /* Use sprintf to format this number into sprintf_buf.  Omit
+		 padding and excess precision, though, because sprintf limits
+		 output length to INT_MAX.
+
+		 There are four types of conversion: double, unsigned
+		 char (passed as int), wide signed int, and wide
+		 unsigned int.  Treat them separately because the
+		 sprintf ABI is sensitive to which type is passed.  Be
+		 careful about integer overflow, NaNs, infinities, and
+		 conversions; for example, the min and max macros are
+		 not suitable here.  */
+	      if (conversion == 'e' || conversion == 'f' || conversion == 'g')
+		{
+		  double x = (INTEGERP (args[n])
+			      ? XINT (args[n])
+			      : XFLOAT_DATA (args[n]));
+		  sprintf_bytes = sprintf (sprintf_buf, convspec, prec, x);
+		}
+	      else if (conversion == 'c')
+		{
+		  /* Don't use sprintf here, as it might mishandle prec.  */
+		  sprintf_buf[0] = XINT (args[n]);
+		  sprintf_bytes = prec != 0;
+		}
+	      else if (conversion == 'd')
+		{
+		  /* For float, maybe we should use "%1.0f"
+		     instead so it also works for values outside
+		     the integer range.  */
+		  signed_wide x;
+		  if (INTEGERP (args[n]))
+		    x = XINT (args[n]);
+		  else
+		    {
+		      double d = XFLOAT_DATA (args[n]);
+		      if (d < 0)
+			{
+			  x = TYPE_MINIMUM (signed_wide);
+			  if (x < d)
+			    x = d;
+			}
+		      else
+			{
+			  x = TYPE_MAXIMUM (signed_wide);
+			  if (d < x)
+			    x = d;
+			}
+		    }
+		  sprintf_bytes = sprintf (sprintf_buf, convspec, prec, x);
+		}
+	      else
+		{
+		  /* Don't sign-extend for octal or hex printing.  */
+		  unsigned_wide x;
+		  if (INTEGERP (args[n]))
+		    x = XUINT (args[n]);
+		  else
+		    {
+		      double d = XFLOAT_DATA (args[n]);
+		      if (d < 0)
+			x = 0;
+		      else
+			{
+			  x = TYPE_MAXIMUM (unsigned_wide);
+			  if (d < x)
+			    x = d;
+			}
+		    }
+		  sprintf_bytes = sprintf (sprintf_buf, convspec, prec, x);
+		}
+
+	      /* Now the length of the formatted item is known, except it omits
+		 padding and excess precision.  Deal with excess precision
+		 first.  This happens only when the format specifies
+		 ridiculously large precision.  */
+	      excess_precision = precision - prec;
+	      if (excess_precision)
+		{
+		  if (conversion == 'e' || conversion == 'f'
+		      || conversion == 'g')
+		    {
+		      if ((conversion == 'g' && ! sharp_flag)
+			  || ! ('0' <= sprintf_buf[sprintf_bytes - 1]
+				&& sprintf_buf[sprintf_bytes - 1] <= '9'))
+			excess_precision = 0;
+		      else
+			{
+			  if (conversion == 'g')
+			    {
+			      char *dot = strchr (sprintf_buf, '.');
+			      if (!dot)
+				excess_precision = 0;
+			    }
+			}
+		      trailing_zeros = excess_precision;
+		    }
+		  else
+		    leading_zeros = excess_precision;
+		}
+
+	      /* Compute the total bytes needed for this item, including
+		 excess precision and padding.  */
+	      numwidth = sprintf_bytes + excess_precision;
+	      padding = numwidth < field_width ? field_width - numwidth : 0;
+	      if (max_bufsize - sprintf_bytes <= excess_precision
+		  || max_bufsize - padding <= numwidth)
+		string_overflow ();
+	      convbytes = numwidth + padding;
+
+	      if (convbytes <= buf + bufsize - p)
+		{
+		  /* Copy the formatted item from sprintf_buf into buf,
+		     inserting padding and excess-precision zeros.  */
+
+                  char *src = sprintf_buf;
+		  char src0 = src[0];
+		  int exponent_bytes = 0;
+		  int signedp = src0 == '-' || src0 == '+' || src0 == ' ';
+		  int significand_bytes;
+		  if (zero_flag && '0' <= src[signedp] && src[signedp] <= '9')
+		    {
+		      leading_zeros += padding;
+		      padding = 0;
+		    }
+
+		  if (excess_precision
+		      && (conversion == 'e' || conversion == 'g'))
+		    {
+		      char *e = strchr (src, 'e');
+		      if (e)
+			exponent_bytes = src + sprintf_bytes - e;
+		    }
+
+		  if (! minus_flag)
+		    {
+		      memset (p, ' ', padding);
+		      p += padding;
+		      nchars += padding;
+		    }
+
+		  *p = src0;
+		  src += signedp;
+		  p += signedp;
+		  memset (p, '0', leading_zeros);
+		  p += leading_zeros;
+		  significand_bytes = sprintf_bytes - signedp - exponent_bytes;
+		  memcpy (p, src, significand_bytes);
+                  p += significand_bytes;
+		  src += significand_bytes;
+		  memset (p, '0', trailing_zeros);
+		  p += trailing_zeros;
+		  memcpy (p, src, exponent_bytes);
+		  p += exponent_bytes;
+
+                  info[n].start = nchars;
+		  nchars += leading_zeros + sprintf_bytes + trailing_zeros;
+		  info[n].end = nchars;
+
+		  if (minus_flag)
+		    {
+		      memset (p, ' ', padding);
+		      p += padding;
+		      nchars += padding;
+		    }
+
+		  continue;
+		}
+	    }
 	}
       else
-	*p++ = *format++, nchars++;
+      copy_char:
+	{
+	  /* Copy a single character from format to buf.  */
+
+	  char *src = format;
+	  unsigned char str[MAX_MULTIBYTE_LENGTH];
+
+	  if (multibyte_format)
+	    {
+	      /* Copy a whole multibyte character.  */
+	      if (p > buf
+		  && !ASCII_BYTE_P (*((unsigned char *) p - 1))
+		  && !CHAR_HEAD_P (*format))
+		maybe_combine_byte = 1;
+
+	      do
+		format++;
+	      while (! CHAR_HEAD_P (*format));
+
+	      convbytes = format - format0;
+	      memset (&discarded[format0 + 1 - format_start], 2, convbytes - 1);
+	    }
+	  else
+	    {
+	      unsigned char uc = *format++;
+	      if (! multibyte || ASCII_BYTE_P (uc))
+		convbytes = 1;
+	      else
+		{
+		  int c = BYTE8_TO_CHAR (uc);
+		  convbytes = CHAR_STRING (c, str);
+		  src = (char *) str;
+		}
+	    }
+
+	  if (convbytes <= buf + bufsize - p)
+	    {
+	      memcpy (p, src, convbytes);
+	      p += convbytes;
+	      nchars++;
+	      continue;
+	    }
+	}
+
+      /* There wasn't enough room to store this conversion or single
+	 character.  CONVBYTES says how much room is needed.  Allocate
+	 enough room (and then some) and do it again.  */
+      {
+	EMACS_INT used = p - buf;
+
+	if (max_bufsize - used < convbytes)
+	  string_overflow ();
+	bufsize = used + convbytes;
+	bufsize = bufsize < max_bufsize / 2 ? bufsize * 2 : max_bufsize;
+
+	if (buf == initial_buffer)
+	  {
+	    buf = xmalloc (bufsize);
+	    sa_must_free = 1;
+	    buf_save_value = make_save_value (buf, 0);
+	    record_unwind_protect (safe_alloca_unwind, buf_save_value);
+	    memcpy (buf, initial_buffer, used);
+	  }
+	else
+	  XSAVE_VALUE (buf_save_value)->pointer = buf = xrealloc (buf, bufsize);
+
+	p = buf + used;
+      }
+
+      format = format0;
+      n = n0;
     }

-  if (p > buf + total)
+  if (bufsize < p - buf)
     abort ();

   if (maybe_combine_byte)
@@ -4089,7 +4234,7 @@
       if (CONSP (props))
 	{
 	  EMACS_INT bytepos = 0, position = 0, translated = 0;
-	  int argn = 1;
+	  EMACS_INT argn = 1;
 	  Lisp_Object list;

 	  /* Adjust the bounds of each text property







^ permalink raw reply	[flat|nested] 5+ messages in thread

* bug#8668: fixes committed to trunk
  2011-05-13  3:01 bug#8668: * editfns.c (Fformat): Fix several integer overflow problems Paul Eggert
       [not found] ` <handler.8668.B.130525572522651.ack@debbugs.gnu.org>
@ 2011-05-27 20:40 ` Paul Eggert
  1 sibling, 0 replies; 5+ messages in thread
From: Paul Eggert @ 2011-05-27 20:40 UTC (permalink / raw)
  To: 8722-done, 8719-done, 8668-done

I just committed bzr 104390 to the Emacs trunk.
It merges fixes for bugs 8668, 8719, and 8722 as
previously discussed.

For Bug#8719, although the patch should fix the problems
it may not be the best patch.  Kenichi Handa wrote that
he'll check it.  Kenichi, if there are problems with it,
please feel free to replace it with something better
or to send me email with suggestions and I'll work on
making it better.  Thanks.

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2011-05-27 20:40 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-05-13  3:01 bug#8668: * editfns.c (Fformat): Fix several integer overflow problems Paul Eggert
     [not found] ` <handler.8668.B.130525572522651.ack@debbugs.gnu.org>
2011-05-16  6:56   ` Paul Eggert
2011-05-16  9:27     ` Eli Zaretskii
2011-05-22 20:54       ` Paul Eggert
2011-05-27 20:40 ` bug#8668: fixes committed to trunk Paul Eggert

Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).