unofficial mirror of emacs-devel@gnu.org 
 help / color / mirror / code / Atom feed
From: Paul Eggert <eggert@cs.ucla.edu>
To: Eli Zaretskii <eliz@gnu.org>
Cc: p.stephani2@gmail.com, emacs-devel@gnu.org
Subject: Re: JSON/YAML/TOML/etc. parsing performance
Date: Wed, 4 Oct 2017 18:48:04 -0700	[thread overview]
Message-ID: <8c922c27-9de0-7d99-6c26-a94a0387c45e@cs.ucla.edu> (raw)
In-Reply-To: <21b0ba97-ed49-43ae-e86f-63fba762353a@cs.ucla.edu>

[-- Attachment #1: Type: text/plain, Size: 765 bytes --]

On 10/04/2017 02:24 PM, Paul Eggert wrote:
> The code snippets I've seen so far in this thread are not enough 
> context to judge whether an exception would be helpful in this case.

Sorry, I looked only at this month's part of the thead. When I went back 
to last month's I found this:

http://lists.gnu.org/archive/html/emacs-devel/2017-09/msg01080.html

for which I suggest the attached as a simplification. One idea here is 
that there is no need for eassert (E) unless there's a genuine doubt 
that E will be true (in some cases the removed eassert (E) calls were 
ineffective anyway, due to preceding eassume (E) calls). The patch cuts 
down the number of integer overflow checks to six in json.c, if I'm 
counting correctly, and that should be good enough.


[-- Attachment #2: 0001-Minor-JSON-cleanups-mostly-for-overflow.patch --]
[-- Type: text/x-patch, Size: 8932 bytes --]

From 9a5c6a9eab6cb5ddbbb1f1c0940a561f6895b478 Mon Sep 17 00:00:00 2001
From: Paul Eggert <eggert@cs.ucla.edu>
Date: Wed, 4 Oct 2017 18:38:07 -0700
Subject: [PATCH] Minor JSON cleanups, mostly for overflow

* src/json.c (json_has_prefix): Simplify via strncmp.
(json_has_suffix): Indent a la GNU.
(json_make_string): Take size_t, not ptrdiff_t, and check its
range.  Simplify callers accordingly.
(json_out_of_memory, json_to_lisp): Just call memory_full.
(check_string_without_embedded_nulls): Use strlen, not memchr;
it is typically faster.
(lisp_to_json_toplevel_1, json_to_lisp): Do not bother with
_GL_ARG_NONNULL on static functions; it is not worth the trouble.
(lisp_to_json_toplevel_1, lisp_to_json, json_read_buffer_callback)
(define_error):
Remove useless checks.
(json_insert): Just call buffer_overflow.
(json_to_lisp): Just signal overflow error, to be consistent with
other signalers.  Use allocate_vector instead of Fmake_vector,
to avoid need for initializing vector twice.  Use make_hash_table
instead of Fmake_hash_table, as it is a bit simpler.
---
 src/json.c | 93 +++++++++++++++++++++-----------------------------------------
 1 file changed, 32 insertions(+), 61 deletions(-)

diff --git a/src/json.c b/src/json.c
index 79be55bd54..5138315da1 100644
--- a/src/json.c
+++ b/src/json.c
@@ -31,9 +31,7 @@ along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.  */
 static bool
 json_has_prefix (const char *string, const char *prefix)
 {
-  size_t string_len = strlen (string);
-  size_t prefix_len = strlen (prefix);
-  return string_len >= prefix_len && memcmp (string, prefix, prefix_len) == 0;
+  return strncmp (string, prefix, strlen (prefix)) == 0;
 }
 
 static bool
@@ -41,22 +39,23 @@ json_has_suffix (const char *string, const char *suffix)
 {
   size_t string_len = strlen (string);
   size_t suffix_len = strlen (suffix);
-  return string_len >= suffix_len
-    && memcmp (string + string_len - suffix_len, suffix, suffix_len) == 0;
+  return (string_len >= suffix_len
+          && (memcmp (string + string_len - suffix_len, suffix, suffix_len)
+              == 0));
 }
 
 static Lisp_Object
-json_make_string (const char *data, ptrdiff_t size)
+json_make_string (const char *data, size_t size)
 {
+  if (PTRDIFF_MAX < size)
+    string_overflow ();
   return make_specified_string (data, -1, size, true);
 }
 
 static Lisp_Object
 json_build_string (const char *data)
 {
-  size_t size = strlen (data);
-  eassert (size < PTRDIFF_MAX);
-  return json_make_string (data, size);
+  return json_make_string (data, strlen (data));
 }
 
 static Lisp_Object
@@ -68,7 +67,7 @@ json_encode (Lisp_Object string)
 static _Noreturn void
 json_out_of_memory (void)
 {
-  xsignal0 (Qjson_out_of_memory);
+  memory_full (SIZE_MAX);
 }
 
 static _Noreturn void
@@ -97,7 +96,7 @@ static void
 check_string_without_embedded_nulls (Lisp_Object object)
 {
   CHECK_STRING (object);
-  CHECK_TYPE (memchr (SDATA (object), '\0', SBYTES (object)) == NULL,
+  CHECK_TYPE (strlen (SSDATA (object)) == SBYTES (object),
               Qstring_without_embedded_nulls_p, object);
 }
 
@@ -114,15 +113,12 @@ static json_t *lisp_to_json (Lisp_Object) ATTRIBUTE_WARN_UNUSED_RESULT;
 /* This returns Lisp_Object so we can use unbind_to.  The return value
    is always nil.  */
 
-static _GL_ARG_NONNULL ((2)) Lisp_Object
+static Lisp_Object
 lisp_to_json_toplevel_1 (Lisp_Object lisp, json_t **json)
 {
   if (VECTORP (lisp))
     {
       ptrdiff_t size = ASIZE (lisp);
-      eassert (size >= 0);
-      if (size > SIZE_MAX)
-        xsignal1 (Qoverflow_error, build_string ("vector is too long"));
       *json = json_check (json_array ());
       ptrdiff_t count = SPECPDL_INDEX ();
       record_unwind_protect_ptr (json_release_object, json);
@@ -194,9 +190,6 @@ lisp_to_json (Lisp_Object lisp)
     {
       Lisp_Object encoded = json_encode (lisp);
       ptrdiff_t size = SBYTES (encoded);
-      eassert (size >= 0);
-      if (size > SIZE_MAX)
-        xsignal1 (Qoverflow_error, build_string ("string is too long"));
       return json_check (json_stringn (SSDATA (encoded), size));
     }
 
@@ -239,7 +232,7 @@ json_insert (void *data)
 {
   const struct json_buffer_and_size *buffer_and_size = data;
   if (buffer_and_size->size > PTRDIFF_MAX)
-    xsignal1 (Qoverflow_error, build_string ("buffer too large"));
+    buffer_overflow ();
   insert (buffer_and_size->buffer, buffer_and_size->size);
   return Qnil;
 }
@@ -289,7 +282,7 @@ OBJECT.  */)
   return unbind_to (count, Qnil);
 }
 
-static _GL_ARG_NONNULL ((1)) Lisp_Object
+static Lisp_Object
 json_to_lisp (json_t *json)
 {
   switch (json_typeof (json))
@@ -304,32 +297,26 @@ json_to_lisp (json_t *json)
       {
         json_int_t value = json_integer_value (json);
         if (FIXNUM_OVERFLOW_P (value))
-          xsignal1 (Qoverflow_error,
-                    build_string ("JSON integer is too large"));
+          xsignal0 (Qoverflow_error);
         return make_number (value);
       }
     case JSON_REAL:
       return make_float (json_real_value (json));
     case JSON_STRING:
-      {
-        size_t size = json_string_length (json);
-        if (FIXNUM_OVERFLOW_P (size))
-          xsignal1 (Qoverflow_error, build_string ("JSON string is too long"));
-        return json_make_string (json_string_value (json), size);
-      }
+      return json_make_string (json_string_value (json),
+                               json_string_length (json));
     case JSON_ARRAY:
       {
         if (++lisp_eval_depth > max_lisp_eval_depth)
           xsignal0 (Qjson_object_too_deep);
         size_t size = json_array_size (json);
         if (FIXNUM_OVERFLOW_P (size))
-          xsignal1 (Qoverflow_error, build_string ("JSON array is too long"));
-        Lisp_Object result = Fmake_vector (make_natnum (size), Qunbound);
+          memory_full (size);
+        struct Lisp_Vector *v = allocate_vector (size);
         for (ptrdiff_t i = 0; i < size; ++i)
-          ASET (result, i,
-                json_to_lisp (json_array_get (json, i)));
+	  v->contents[i] = json_to_lisp (json_array_get (json, i));
         --lisp_eval_depth;
-        return result;
+        return make_lisp_ptr (v, Lisp_Vectorlike);
       }
     case JSON_OBJECT:
       {
@@ -337,10 +324,10 @@ json_to_lisp (json_t *json)
           xsignal0 (Qjson_object_too_deep);
         size_t size = json_object_size (json);
         if (FIXNUM_OVERFLOW_P (size))
-          xsignal1 (Qoverflow_error,
-                    build_string ("JSON object has too many elements"));
-        Lisp_Object result = CALLN (Fmake_hash_table, QCtest, Qequal,
-                                    QCsize, make_natnum (size));
+          memory_full (size);
+        Lisp_Object result
+          = make_hash_table (hashtest_equal, size, DEFAULT_REHASH_SIZE,
+                             DEFAULT_REHASH_THRESHOLD, Qnil, false);
         struct Lisp_Hash_Table *h = XHASH_TABLE (result);
         const char *key_str;
         json_t *value;
@@ -399,23 +386,12 @@ json_read_buffer_callback (void *buffer, size_t buflen, void *data)
   /* First, parse from point to the gap or the end of the accessible
      portion, whatever is closer.  */
   ptrdiff_t point = d->point;
-  ptrdiff_t end;
-  {
-    bool overflow = INT_ADD_WRAPV (BUFFER_CEILING_OF (point), 1, &end);
-    eassert (!overflow);
-  }
-  size_t count;
-  {
-    bool overflow = INT_SUBTRACT_WRAPV (end, point, &count);
-    eassert (!overflow);
-  }
+  ptrdiff_t end = BUFFER_CEILING_OF (point) + 1;
+  ptrdiff_t count = end - point;
   if (buflen < count)
     count = buflen;
   memcpy (buffer, BYTE_POS_ADDR (point), count);
-  {
-    bool overflow = INT_ADD_WRAPV (d->point, count, &d->point);
-    eassert (!overflow);
-  }
+  d->point += count;
   return count;
 }
 
@@ -444,14 +420,11 @@ not moved.  */)
   /* Convert and then move point only if everything succeeded.  */
   Lisp_Object lisp = json_to_lisp (object);
 
-  {
-    /* Adjust point by how much we just read.  Do this here because
-       tokener->char_offset becomes incorrect below.  */
-    bool overflow = INT_ADD_WRAPV (point, error.position, &point);
-    eassert (!overflow);
-    eassert (point <= ZV_BYTE);
-    SET_PT_BOTH (BYTE_TO_CHAR (point), point);
-  }
+  /* Adjust point by how much we just read.  Do this here because
+     tokener->char_offset becomes incorrect below.  */
+  eassert (0 <= error.position && error.position <= ZV_BYTE - point);
+  point += error.position;
+  SET_PT_BOTH (BYTE_TO_CHAR (point), point);
 
   return unbind_to (count, lisp);
 }
@@ -462,8 +435,6 @@ not moved.  */)
 static void
 define_error (Lisp_Object name, const char *message, Lisp_Object parent)
 {
-  eassert (SYMBOLP (name));
-  eassert (SYMBOLP (parent));
   Lisp_Object parent_conditions = Fget (parent, Qerror_conditions);
   eassert (CONSP (parent_conditions));
   eassert (!NILP (Fmemq (parent, parent_conditions)));
-- 
2.13.6


  reply	other threads:[~2017-10-05  1:48 UTC|newest]

Thread overview: 81+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-09-16 15:54 JSON/YAML/TOML/etc. parsing performance Ted Zlatanov
2017-09-16 16:02 ` Mark Oteiza
2017-09-17  0:02   ` Richard Stallman
2017-09-17  3:13     ` Mark Oteiza
2017-09-18  0:00       ` Richard Stallman
2017-09-17  0:02 ` Richard Stallman
2017-09-18 13:46   ` Ted Zlatanov
2017-09-17 18:46 ` Philipp Stephani
2017-09-17 19:05   ` Eli Zaretskii
2017-09-17 20:27     ` Philipp Stephani
2017-09-17 22:41       ` Mark Oteiza
2017-09-18 13:53       ` Ted Zlatanov
2017-09-17 21:17   ` Speed of Elisp (was: JSON/YAML/TOML/etc. parsing performance) Stefan Monnier
2017-09-18 13:26   ` JSON/YAML/TOML/etc. parsing performance Philipp Stephani
2017-09-18 13:58     ` Mark Oteiza
2017-09-18 14:14       ` Philipp Stephani
2017-09-18 14:28         ` Mark Oteiza
2017-09-18 14:36           ` Philipp Stephani
2017-09-18 15:02             ` Eli Zaretskii
2017-09-18 16:14               ` Philipp Stephani
2017-09-18 17:33                 ` Eli Zaretskii
2017-09-18 19:57                 ` Thien-Thi Nguyen
2017-09-18 14:57     ` Eli Zaretskii
2017-09-18 15:07       ` Mark Oteiza
2017-09-18 15:51         ` Eli Zaretskii
2017-09-18 16:22           ` Philipp Stephani
2017-09-18 18:08             ` Eli Zaretskii
2017-09-19 19:32               ` Richard Stallman
2017-09-18 17:26           ` Glenn Morris
2017-09-18 18:16             ` Eli Zaretskii
2017-09-18 16:08       ` Philipp Stephani
2017-09-19  8:18     ` Philipp Stephani
2017-09-19 19:09       ` Eli Zaretskii
2017-09-28 21:19         ` Philipp Stephani
2017-09-28 21:27           ` Stefan Monnier
2017-09-29 19:55           ` Eli Zaretskii
2017-09-30 22:02             ` Philipp Stephani
2017-10-01 18:06               ` Eli Zaretskii
2017-10-03 12:26                 ` Philipp Stephani
2017-10-03 15:31                   ` Eli Zaretskii
2017-10-03 15:52                     ` Philipp Stephani
2017-10-03 16:26                       ` Eli Zaretskii
2017-10-03 17:10                         ` Eli Zaretskii
2017-10-03 18:37                           ` Philipp Stephani
2017-10-03 20:52                   ` Paul Eggert
2017-10-04  5:33                     ` Eli Zaretskii
2017-10-04  6:41                       ` Paul Eggert
2017-10-04  8:03                         ` Eli Zaretskii
2017-10-04 17:51                           ` Paul Eggert
2017-10-04 19:38                             ` Eli Zaretskii
2017-10-04 21:24                               ` Paul Eggert
2017-10-05  1:48                                 ` Paul Eggert [this message]
2017-10-05  7:14                                   ` Eli Zaretskii
2017-10-08 22:52                                   ` Philipp Stephani
2017-10-09  5:54                                     ` Paul Eggert
2017-10-29 20:48                                       ` Philipp Stephani
2017-10-09  6:38                                     ` Eli Zaretskii
2017-10-05  7:12                                 ` Eli Zaretskii
2017-10-06  1:58                                   ` Paul Eggert
2017-10-06  7:40                                     ` Eli Zaretskii
2017-10-06 19:36                                       ` Paul Eggert
2017-10-06 21:03                                         ` Eli Zaretskii
2017-10-08 23:09                                     ` Philipp Stephani
2017-10-09  6:19                                       ` Paul Eggert
2017-10-29 20:48                                         ` Philipp Stephani
2017-10-29 22:49                                           ` Philipp Stephani
2017-12-09 23:05                                             ` Philipp Stephani
2017-12-10  7:08                                               ` Eli Zaretskii
2017-12-10 13:26                                                 ` Philipp Stephani
2017-12-10 13:32                                                   ` Ted Zlatanov
2017-10-08 23:04                                   ` Philipp Stephani
2017-10-09  6:47                                     ` Eli Zaretskii
2017-10-08 17:58                     ` Philipp Stephani
2017-10-08 18:42                       ` Eli Zaretskii
2017-10-08 23:14                         ` Philipp Stephani
2017-10-09  6:53                           ` Eli Zaretskii
2017-10-29 20:41                             ` Philipp Stephani
2017-10-09  6:22                       ` Paul Eggert
2017-10-01 18:38               ` Eli Zaretskii
2017-10-03 12:12                 ` Philipp Stephani
2017-10-03 14:54                   ` Eli Zaretskii

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://www.gnu.org/software/emacs/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=8c922c27-9de0-7d99-6c26-a94a0387c45e@cs.ucla.edu \
    --to=eggert@cs.ucla.edu \
    --cc=eliz@gnu.org \
    --cc=emacs-devel@gnu.org \
    --cc=p.stephani2@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.savannah.gnu.org/cgit/emacs.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).