* [PATCH] Implement fast verisons of json-parse functions
@ 2024-03-19 18:23 Herman, Géza
2024-03-19 18:47 ` Eli Zaretskii
0 siblings, 1 reply; 21+ messages in thread
From: Herman, Géza @ 2024-03-19 18:23 UTC (permalink / raw)
To: emacs-devel
[-- Attachment #1: Type: text/plain, Size: 247 bytes --]
Tags: patch
Here's the latest version of my JSON parser. It is the same as before,
the only modification I did is that this patch doesn't replace the
original parser, but keeps them, and adds to two additional functions
with the -fast suffix.
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-Implement-fast-verisons-of-json-parse-functions.patch --]
[-- Type: text/patch, Size: 34292 bytes --]
From 48399572efbc16887f49fc5a0f20bed3b16b2115 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?G=C3=A9za=20Herman?= <geza.herman@gmail.com>
Date: Tue, 19 Mar 2024 18:59:09 +0100
Subject: [PATCH] Implement fast verisons of json-parse functions
---
src/json.c | 1108 ++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 1108 insertions(+)
diff --git a/src/json.c b/src/json.c
index e849ccaf722..c08d9d4a4f7 100644
--- a/src/json.c
+++ b/src/json.c
@@ -23,6 +23,7 @@ Copyright (C) 2017-2024 Free Software Foundation, Inc.
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
+#include <math.h>
#include <jansson.h>
@@ -1087,6 +1088,1099 @@ DEFUN ("json-parse-buffer", Fjson_parse_buffer, Sjson_parse_buffer,
return unbind_to (count, lisp);
}
+struct json_parser
+{
+ /* Because of a possible gap in the input (an emacs buffer can have
+ a gap), the input is described by [input_begin;input_end) and
+ [secondary_input_begin;secondary_input_end). If the input is
+ continuous, then secondary_input_begin and secondary_input_end
+ should be NULL */
+ const unsigned char *input_current;
+ const unsigned char *input_begin;
+ const unsigned char *input_end;
+
+ const unsigned char *secondary_input_begin;
+ const unsigned char *secondary_input_end;
+
+ int current_line;
+ int current_column;
+
+ /* The parser has a maximum allowed depth. available_depth
+ decreases at each object/array begin. If reaches zero, then an
+ error is generated */
+ int available_depth;
+
+ struct json_configuration conf;
+
+ size_t additional_bytes_count;
+
+ /* Lisp_Objects are collected in this area during object/array
+ parsing */
+ Lisp_Object object_workspace;
+ size_t object_workspace_size;
+ size_t object_workspace_current;
+
+ /* String and number parsing uses this workspace */
+ unsigned char *byte_workspace;
+ unsigned char *byte_workspace_end;
+ unsigned char *byte_workspace_current;
+};
+
+static AVOID
+json_signal_error (struct json_parser *parser, Lisp_Object error)
+{
+ xsignal2 (error, INT_TO_INTEGER (parser->current_line),
+ INT_TO_INTEGER (parser->current_column));
+}
+
+static void
+json_parser_init (struct json_parser *parser,
+ struct json_configuration conf,
+ const unsigned char *input,
+ const unsigned char *input_end,
+ const unsigned char *secondary_input,
+ const unsigned char *secondary_input_end)
+{
+ const int initial_object_workspace_size = 64;
+ const int initial_string_workspace_size = 512;
+
+ if (secondary_input >= secondary_input_end)
+ {
+ secondary_input = NULL;
+ secondary_input_end = NULL;
+ }
+
+ if (input < input_end)
+ {
+ parser->input_begin = input;
+ parser->input_end = input_end;
+
+ parser->secondary_input_begin = secondary_input;
+ parser->secondary_input_end = secondary_input_end;
+ }
+ else
+ {
+ parser->input_begin = secondary_input;
+ parser->input_end = secondary_input_end;
+
+ parser->secondary_input_begin = NULL;
+ parser->secondary_input_end = NULL;
+ }
+
+ parser->input_current = parser->input_begin;
+
+ parser->current_line = 1;
+ parser->current_column = 0;
+ parser->available_depth = 10000;
+ parser->conf = conf;
+
+ parser->additional_bytes_count = 0;
+
+ parser->object_workspace
+ = make_vector (initial_object_workspace_size, Qnil);
+ parser->object_workspace_size = initial_object_workspace_size;
+ parser->object_workspace_current = 0;
+
+ parser->byte_workspace = xmalloc (initial_string_workspace_size);
+ parser->byte_workspace_end
+ = parser->byte_workspace + initial_string_workspace_size;
+}
+
+static void
+json_parser_done (void *parser)
+{
+ struct json_parser *p = (struct json_parser *) parser;
+ xfree (p->byte_workspace);
+}
+
+/* Makes sure that the object_workspace has 'size' available space for
+ Lisp_Objects */
+NO_INLINE static void
+json_make_object_workspace_for_slow_path (struct json_parser *parser,
+ size_t size)
+{
+ size_t needed_workspace_size
+ = (parser->object_workspace_current + size);
+ size_t new_workspace_size = parser->object_workspace_size;
+ while (new_workspace_size < needed_workspace_size)
+ {
+ if (ckd_mul (&new_workspace_size, new_workspace_size, 2))
+ {
+ json_signal_error (parser, Qjson_out_of_memory);
+ }
+ }
+ Lisp_Object new_workspace = make_vector (new_workspace_size, Qnil);
+ for (size_t i = 0; i < parser->object_workspace_current; i++)
+ {
+ ASET (new_workspace, i, AREF (parser->object_workspace, i));
+ }
+ parser->object_workspace = new_workspace;
+ parser->object_workspace_size = new_workspace_size;
+}
+
+INLINE void
+json_make_object_workspace_for (struct json_parser *parser,
+ size_t size)
+{
+ if (parser->object_workspace_size - parser->object_workspace_current
+ < size)
+ {
+ json_make_object_workspace_for_slow_path (parser, size);
+ }
+}
+
+static void
+json_byte_workspace_reset (struct json_parser *parser)
+{
+ parser->byte_workspace_current = parser->byte_workspace;
+}
+
+/* Puts 'value' into the byte_workspace. If there is no space
+ available, it allocates space */
+NO_INLINE static void
+json_byte_workspace_put_slow_path (struct json_parser *parser,
+ unsigned char value)
+{
+ size_t new_workspace_size
+ = parser->byte_workspace_end - parser->byte_workspace;
+ if (ckd_mul (&new_workspace_size, new_workspace_size, 2))
+ {
+ json_signal_error (parser, Qjson_out_of_memory);
+ }
+
+ size_t offset
+ = parser->byte_workspace_current - parser->byte_workspace;
+ parser->byte_workspace
+ = xrealloc (parser->byte_workspace, new_workspace_size);
+ parser->byte_workspace_end
+ = parser->byte_workspace + new_workspace_size;
+ parser->byte_workspace_current = parser->byte_workspace + offset;
+ *parser->byte_workspace_current++ = value;
+}
+
+INLINE void
+json_byte_workspace_put (struct json_parser *parser,
+ unsigned char value)
+{
+ if (parser->byte_workspace_current < parser->byte_workspace_end)
+ {
+ *parser->byte_workspace_current++ = value;
+ }
+ else
+ {
+ json_byte_workspace_put_slow_path (parser, value);
+ }
+}
+
+static bool
+json_input_at_eof (struct json_parser *parser)
+{
+ if (parser->input_current < parser->input_end)
+ return false;
+ return parser->secondary_input_end == NULL;
+}
+
+/* If there is a secondary buffer, it switches to it */
+static int
+json_input_switch_to_secondary (struct json_parser *parser)
+{
+ if (parser->secondary_input_begin < parser->secondary_input_end)
+ {
+ parser->additional_bytes_count
+ = parser->input_end - parser->input_begin;
+ parser->input_begin = parser->secondary_input_begin;
+ parser->input_end = parser->secondary_input_end;
+ parser->input_current = parser->secondary_input_begin;
+ parser->secondary_input_begin = NULL;
+ parser->secondary_input_end = NULL;
+ return 0;
+ }
+ else
+ return -1;
+}
+
+/* Reads a byte from the JSON input stream */
+static unsigned char
+json_input_get (struct json_parser *parser)
+{
+ if (parser->input_current >= parser->input_end
+ && json_input_switch_to_secondary (parser) < 0)
+ json_signal_error (parser, Qjson_end_of_file);
+ return *parser->input_current++;
+}
+
+/* Reads a byte from the JSON input stream, if the stream is not at
+ * eof. At eof, returns -1 */
+static int
+json_input_get_if_possible (struct json_parser *parser)
+{
+ if (parser->input_current >= parser->input_end
+ && json_input_switch_to_secondary (parser) < 0)
+ return -1;
+ return *parser->input_current++;
+}
+
+/* Puts back the last read input byte. Only one byte can be put back,
+ because otherwise this code would need to handle switching from
+ the secondary buffer to the initial */
+static void
+json_input_put_back (struct json_parser *parser)
+{
+ parser->input_current--;
+}
+
+static bool
+json_skip_whitespace_internal (struct json_parser *parser, int c)
+{
+ parser->current_column++;
+ if (c == 0x20 || c == 0x09 || c == 0x0d)
+ return false;
+ else if (c == 0x0a)
+ {
+ parser->current_line++;
+ parser->current_column = 0;
+ return false;
+ }
+ else
+ return true;
+}
+
+/* Skips JSON whitespace, and returns with the first non-whitespace
+ * character */
+static int
+json_skip_whitespace (struct json_parser *parser)
+{
+ for (;;)
+ {
+ int c = json_input_get (parser);
+ if (json_skip_whitespace_internal (parser, c))
+ return c;
+ }
+}
+
+/* Skips JSON whitespace, and returns with the first non-whitespace
+ * character, if possible. If there is no non-whitespace character
+ * (because we reached the end), it returns -1 */
+static int
+json_skip_whitespace_if_possible (struct json_parser *parser)
+{
+ for (;;)
+ {
+ int c = json_input_get_if_possible (parser);
+ if (c < 0)
+ return c;
+ if (json_skip_whitespace_internal (parser, c))
+ return c;
+ }
+}
+
+static int
+json_hex_value (int c)
+{
+ if (c >= '0' && c <= '9')
+ return c - '0';
+ else if (c >= 'A' && c <= 'F')
+ return c - 'A' + 10;
+ else if (c >= 'a' && c <= 'f')
+ return c - 'a' + 10;
+ else
+ return -1;
+}
+
+/* Parses the CCCC part of the unicode escape sequence \uCCCC */
+static int
+json_parse_unicode (struct json_parser *parser)
+{
+ unsigned char v[4];
+ for (int i = 0; i < 4; i++)
+ {
+ int c = json_hex_value (json_input_get (parser));
+ parser->current_column++;
+ if (c < 0)
+ json_signal_error (parser, Qjson_escape_sequence_error);
+ v[i] = c;
+ }
+
+ return v[0] << 12 | v[1] << 8 | v[2] << 4 | v[3];
+}
+
+/* Parses an utf-8 code-point encoding (except the first byte), and
+ returns the numeric value of the code-point (without considering
+ the first byte) */
+static int
+json_handle_utf8_tail_bytes (struct json_parser *parser, int n)
+{
+ int v = 0;
+ for (int i = 0; i < n; i++)
+ {
+ int c = json_input_get (parser);
+ json_byte_workspace_put (parser, c);
+ if ((c & 0xc0) != 0x80)
+ json_signal_error (parser, Qjson_utf8_decode_error);
+ v = (v << 6) | (c & 0x3f);
+ }
+ return v;
+}
+
+/* Reads a JSON string, and puts the result into the byte workspace */
+static void
+json_parse_string (struct json_parser *parser)
+{
+ /* a single_uninteresting byte can be simply copied from the input
+ to output, it doesn't need any extra care. This means all the
+ characters between [0x20;0x7f], except the double quote and
+ the backslash */
+ static const char is_single_uninteresting[256] = {
+ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+ /* 0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 1 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 2 */ 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ /* 3 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ /* 4 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ /* 5 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
+ /* 6 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ /* 7 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ /* 8 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 9 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* a */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* b */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* c */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* d */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* e */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* f */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ };
+
+ for (;;)
+ {
+ /* This if is only here for a possible speedup. If there are 4
+ bytes available, and all of them are single_uninteresting,
+ then we can just copy these 4 bytes to output */
+ if (parser->input_end - parser->input_current >= 4)
+ {
+ int c0 = parser->input_current[0];
+ int c1 = parser->input_current[1];
+ int c2 = parser->input_current[2];
+ int c3 = parser->input_current[3];
+ bool v0 = is_single_uninteresting[c0];
+ bool v1 = is_single_uninteresting[c1];
+ bool v2 = is_single_uninteresting[c2];
+ bool v3 = is_single_uninteresting[c3];
+ if (v0 && v1 && v2 && v3)
+ {
+ json_byte_workspace_put (parser, c0);
+ json_byte_workspace_put (parser, c1);
+ json_byte_workspace_put (parser, c2);
+ json_byte_workspace_put (parser, c3);
+ parser->input_current += 4;
+ parser->current_column += 4;
+ continue;
+ }
+ }
+
+ int c = json_input_get (parser);
+ parser->current_column++;
+ if (is_single_uninteresting[c])
+ {
+ json_byte_workspace_put (parser, c);
+ continue;
+ }
+
+ if (c == '"')
+ return;
+ else if (c & 0x80)
+ {
+ /* Handle utf-8 encoding */
+ json_byte_workspace_put (parser, c);
+ if (c < 0xc0)
+ json_signal_error (parser, Qjson_utf8_decode_error);
+ else if (c < 0xe0)
+ {
+ int n = ((c & 0x1f) << 6
+ | json_handle_utf8_tail_bytes (parser, 1));
+ if (n < 0x80)
+ json_signal_error (parser, Qjson_utf8_decode_error);
+ }
+ else if (c < 0xf0)
+ {
+ int n = ((c & 0xf) << 12
+ | json_handle_utf8_tail_bytes (parser, 2));
+ if (n < 0x800 || (n >= 0xd800 && n < 0xe000))
+ json_signal_error (parser, Qjson_utf8_decode_error);
+ }
+ else if (c < 0xf8)
+ {
+ int n = ((c & 0x7) << 18
+ | json_handle_utf8_tail_bytes (parser, 3));
+ if (n < 0x10000 || n > 0x10ffff)
+ json_signal_error (parser, Qjson_utf8_decode_error);
+ }
+ else
+ json_signal_error (parser, Qjson_utf8_decode_error);
+ }
+ else if (c == '\\')
+ {
+ /* Handle escape sequences */
+ c = json_input_get (parser);
+ parser->current_column++;
+ if (c == '"')
+ json_byte_workspace_put (parser, '"');
+ else if (c == '\\')
+ json_byte_workspace_put (parser, '\\');
+ else if (c == '/')
+ json_byte_workspace_put (parser, '/');
+ else if (c == 'b')
+ json_byte_workspace_put (parser, '\b');
+ else if (c == 'f')
+ json_byte_workspace_put (parser, '\f');
+ else if (c == 'n')
+ json_byte_workspace_put (parser, '\n');
+ else if (c == 'r')
+ json_byte_workspace_put (parser, '\r');
+ else if (c == 't')
+ json_byte_workspace_put (parser, '\t');
+ else if (c == 'u')
+ {
+ int num = json_parse_unicode (parser);
+ /* is the first half of the surrogate pair */
+ if (num >= 0xd800 && num < 0xdc00)
+ {
+ parser->current_column++;
+ if (json_input_get (parser) != '\\')
+ json_signal_error (parser,
+ Qjson_invalid_surrogate_error);
+ parser->current_column++;
+ if (json_input_get (parser) != 'u')
+ json_signal_error (parser,
+ Qjson_invalid_surrogate_error);
+ int num2 = json_parse_unicode (parser);
+ if (num2 < 0xdc00 || num2 >= 0xe000)
+ json_signal_error (parser,
+ Qjson_invalid_surrogate_error);
+ num = (0x10000
+ + ((num - 0xd800) << 10 | (num2 - 0xdc00)));
+ }
+ else if (num >= 0xdc00 && num < 0xe000)
+ /* is the second half of the surrogate pair without
+ the first half */
+ json_signal_error (parser,
+ Qjson_invalid_surrogate_error);
+
+ /* utf-8 encode the code-point */
+ if (num < 0x80)
+ json_byte_workspace_put (parser, num);
+ else if (num < 0x800)
+ {
+ json_byte_workspace_put (parser, 0xc0 | num >> 6);
+ json_byte_workspace_put (parser,
+ 0x80 | (num & 0x3f));
+ }
+ else if (num < 0x10000)
+ {
+ json_byte_workspace_put (parser, 0xe0 | num >> 12);
+ json_byte_workspace_put (parser,
+ (0x80
+ | ((num >> 6) & 0x3f)));
+ json_byte_workspace_put (parser,
+ 0x80 | (num & 0x3f));
+ }
+ else
+ {
+ json_byte_workspace_put (parser, 0xf0 | num >> 18);
+ json_byte_workspace_put (parser,
+ (0x80
+ | ((num >> 12) & 0x3f)));
+ json_byte_workspace_put (parser,
+ (0x80
+ | ((num >> 6) & 0x3f)));
+ json_byte_workspace_put (parser,
+ 0x80 | (num & 0x3f));
+ }
+ }
+ else
+ json_signal_error (parser, Qjson_escape_sequence_error);
+ }
+ else
+ json_signal_error (parser, Qjson_parse_error);
+ }
+}
+
+/* If there was no integer overflow during parsing the integer, this
+ puts 'value' to the output. Otherwise this calls string_to_number
+ to parse integer on the byte workspace. This could just always
+ call string_to_number, but for performance reasons, during parsing
+ the code tries to calculate the value, so in most cases, we can
+ save call of string_to_number */
+static Lisp_Object
+json_create_integer (struct json_parser *parser,
+ bool integer_overflow, bool negative,
+ EMACS_UINT value)
+{
+ if (!integer_overflow)
+ {
+ if (negative)
+ {
+ uintmax_t v = value;
+ if (v <= (uintmax_t) INTMAX_MAX + 1)
+ return INT_TO_INTEGER ((intmax_t) -v);
+ }
+ else
+ return INT_TO_INTEGER (value);
+ }
+
+ json_byte_workspace_put (parser, 0);
+ ptrdiff_t len;
+ Lisp_Object result
+ = string_to_number ((const char *) parser->byte_workspace, 10,
+ &len);
+ if (len
+ != parser->byte_workspace_current - parser->byte_workspace - 1)
+ json_signal_error (parser, Qjson_error);
+ return result;
+}
+
+/* Parses a float using the byte workspace */
+static Lisp_Object
+json_create_float (struct json_parser *parser)
+{
+ json_byte_workspace_put (parser, 0);
+ errno = 0;
+ char *e;
+ double value = strtod ((const char *) parser->byte_workspace, &e);
+ bool out_of_range
+ = (errno != 0 && (value == HUGE_VAL || value == -HUGE_VAL));
+ if (out_of_range)
+ json_signal_error (parser, Qjson_number_out_of_range);
+ else if ((const unsigned char *) e
+ != parser->byte_workspace_current - 1)
+ json_signal_error (parser, Qjson_error);
+ else
+ return make_float (value);
+}
+
+/* Parses a number. The first character is the input parameter 'c'.
+ */
+static Lisp_Object
+json_parse_number (struct json_parser *parser, int c)
+{
+ json_byte_workspace_reset (parser);
+ json_byte_workspace_put (parser, c);
+
+ bool negative = false;
+ if (c == '-')
+ {
+ negative = true;
+ c = json_input_get (parser);
+ json_byte_workspace_put (parser, c);
+ parser->current_column++;
+ }
+ if (c < '0' || c > '9')
+ json_signal_error (parser, Qjson_parse_error);
+
+ /* The idea is that during finding the last character of the
+ number, the for loop below also tries to calculate the value. If
+ the parsed number is an integer which fits into unsigned long,
+ then the parser can use the value of 'integer' right away,
+ instead of having to re-parse the byte workspace later.
+ Ideally, this integer should have the same size as a CPU general
+ purpose register. */
+ EMACS_UINT integer = c - '0';
+ bool integer_overflow = false;
+
+ if (integer == 0)
+ {
+ if (json_input_at_eof (parser))
+ return INT_TO_INTEGER (0);
+ c = json_input_get (parser);
+ }
+ else
+ {
+ for (;;)
+ {
+ if (json_input_at_eof (parser))
+ return json_create_integer (parser, integer_overflow,
+ negative, integer);
+ c = json_input_get (parser);
+ if (c < '0' || c > '9')
+ break;
+ json_byte_workspace_put (parser, c);
+ parser->current_column++;
+
+ integer_overflow |= ckd_mul (&integer, integer, 10);
+ integer_overflow |= ckd_add (&integer, integer, c - '0');
+ }
+ }
+
+ bool is_float = false;
+ if (c == '.')
+ {
+ json_byte_workspace_put (parser, c);
+ parser->current_column++;
+
+ is_float = true;
+ c = json_input_get (parser);
+ json_byte_workspace_put (parser, c);
+ parser->current_column++;
+ if (c < '0' || c > '9')
+ json_signal_error (parser, Qjson_parse_error);
+ for (;;)
+ {
+ if (json_input_at_eof (parser))
+ return json_create_float (parser);
+ c = json_input_get (parser);
+ if (c < '0' || c > '9')
+ break;
+ json_byte_workspace_put (parser, c);
+ parser->current_column++;
+ }
+ }
+ if (c == 'e' || c == 'E')
+ {
+ json_byte_workspace_put (parser, c);
+ parser->current_column++;
+
+ is_float = true;
+ c = json_input_get (parser);
+ json_byte_workspace_put (parser, c);
+ parser->current_column++;
+ if (c == '-' || c == '+')
+ {
+ c = json_input_get (parser);
+ json_byte_workspace_put (parser, c);
+ parser->current_column++;
+ }
+ if (c < '0' || c > '9')
+ json_signal_error (parser, Qjson_parse_error);
+ for (;;)
+ {
+ if (json_input_at_eof (parser))
+ return json_create_float (parser);
+ c = json_input_get (parser);
+ if (c < '0' || c > '9')
+ break;
+ json_byte_workspace_put (parser, c);
+ parser->current_column++;
+ }
+ }
+
+ /* 'c' contains a character which is not part of the number,
+ so it is need to be put back */
+ json_input_put_back (parser);
+
+ if (is_float)
+ return json_create_float (parser);
+ else
+ return json_create_integer (parser, integer_overflow, negative,
+ integer);
+}
+
+static Lisp_Object json_parse_value (struct json_parser *parser,
+ int c);
+
+/* Parses a JSON array. */
+static Lisp_Object
+json_parse_array (struct json_parser *parser)
+{
+ int c = json_skip_whitespace (parser);
+
+ const size_t first = parser->object_workspace_current;
+
+ if (c != ']')
+ {
+ parser->available_depth--;
+ if (parser->available_depth < 0)
+ json_signal_error (parser, Qjson_object_too_deep);
+
+ size_t number_of_elements = 0;
+ /* This loop collects the array elements in the object workspace
+ */
+ for (;;)
+ {
+ Lisp_Object element = json_parse_value (parser, c);
+ json_make_object_workspace_for (parser, 1);
+ ASET (parser->object_workspace,
+ parser->object_workspace_current, element);
+ parser->object_workspace_current++;
+
+ c = json_skip_whitespace (parser);
+
+ number_of_elements++;
+ if (c == ']')
+ {
+ parser->available_depth++;
+ break;
+ }
+
+ if (c != ',')
+ json_signal_error (parser, Qjson_parse_error);
+
+ c = json_skip_whitespace (parser);
+ }
+ }
+
+ Lisp_Object result;
+ size_t number_of_elements
+ = parser->object_workspace_current - first;
+
+ switch (parser->conf.array_type)
+ {
+ case json_array_array:
+ result = make_vector (number_of_elements, Qnil);
+ for (size_t i = 0; i < number_of_elements; i++)
+ {
+ rarely_quit (i);
+ ASET (result, i,
+ AREF (parser->object_workspace, first + i));
+ }
+ break;
+ case json_array_list:
+ result = Qnil;
+ for (size_t i = 0; i < number_of_elements; ++i)
+ {
+ rarely_quit (i);
+ result
+ = Fcons (AREF (parser->object_workspace,
+ parser->object_workspace_current - i - 1),
+ result);
+ }
+ break;
+ default:
+ emacs_abort ();
+ }
+
+ parser->object_workspace_current = first;
+
+ return result;
+}
+
+/* Parses a JSON object. */
+static Lisp_Object
+json_parse_object (struct json_parser *parser)
+{
+ int c = json_skip_whitespace (parser);
+
+ const size_t first = parser->object_workspace_current;
+
+ if (c != '}')
+ {
+ parser->available_depth--;
+ if (parser->available_depth < 0)
+ json_signal_error (parser, Qjson_object_too_deep);
+
+ /* This loop collects the object members (key/value pairs) in
+ * the object workspace */
+ for (;;)
+ {
+ if (c != '"')
+ json_signal_error (parser, Qjson_parse_error);
+
+ Lisp_Object key;
+ json_byte_workspace_reset (parser);
+ switch (parser->conf.object_type)
+ {
+ case json_object_hashtable:
+ {
+ json_parse_string (parser);
+ key
+ = make_string_from_utf8 ((char *)
+ parser->byte_workspace,
+ (parser->byte_workspace_current
+ - parser->byte_workspace));
+ break;
+ }
+ case json_object_alist:
+ {
+ json_parse_string (parser);
+ key = Fintern (make_string_from_utf8 (
+ (char *) parser->byte_workspace,
+ (parser->byte_workspace_current
+ - parser->byte_workspace)),
+ Qnil);
+ break;
+ }
+ case json_object_plist:
+ {
+ json_byte_workspace_put (parser, ':');
+ json_parse_string (parser);
+ key = intern_1 ((char *) parser->byte_workspace,
+ (parser->byte_workspace_current
+ - parser->byte_workspace));
+ break;
+ }
+ default:
+ emacs_abort ();
+ }
+
+ c = json_skip_whitespace (parser);
+ if (c != ':')
+ json_signal_error (parser, Qjson_parse_error);
+
+ c = json_skip_whitespace (parser);
+
+ Lisp_Object value = json_parse_value (parser, c);
+
+ json_make_object_workspace_for (parser, 2);
+ ASET (parser->object_workspace,
+ parser->object_workspace_current, key);
+ parser->object_workspace_current++;
+ ASET (parser->object_workspace,
+ parser->object_workspace_current, value);
+ parser->object_workspace_current++;
+
+ c = json_skip_whitespace (parser);
+
+ if (c == '}')
+ {
+ parser->available_depth++;
+ break;
+ }
+
+ if (c != ',')
+ json_signal_error (parser, Qjson_parse_error);
+
+ c = json_skip_whitespace (parser);
+ }
+ }
+
+ Lisp_Object result;
+ switch (parser->conf.object_type)
+ {
+ case json_object_hashtable:
+ {
+ result
+ = CALLN (Fmake_hash_table, QCtest, Qequal, QCsize,
+ make_fixed_natnum (
+ (parser->object_workspace_current - first) / 2));
+ struct Lisp_Hash_Table *h = XHASH_TABLE (result);
+ for (size_t i = first; i < parser->object_workspace_current;
+ i += 2)
+ {
+ hash_hash_t hash;
+ Lisp_Object key = AREF (parser->object_workspace, i);
+ Lisp_Object value
+ = AREF (parser->object_workspace, i + 1);
+ ptrdiff_t i = hash_lookup_get_hash (h, key, &hash);
+ if (i < 0)
+ hash_put (h, key, value, hash);
+ else
+ set_hash_value_slot (h, i, value);
+ }
+ break;
+ }
+ case json_object_alist:
+ {
+ result = Qnil;
+ for (size_t i = parser->object_workspace_current; i > first;
+ i -= 2)
+ {
+ Lisp_Object key = AREF (parser->object_workspace, i - 2);
+ Lisp_Object value
+ = AREF (parser->object_workspace, i - 1);
+ result = Fcons (Fcons (key, value), result);
+ }
+ break;
+ }
+ case json_object_plist:
+ {
+ result = Qnil;
+ for (size_t i = parser->object_workspace_current; i > first;
+ i -= 2)
+ {
+ Lisp_Object key = AREF (parser->object_workspace, i - 2);
+ Lisp_Object value
+ = AREF (parser->object_workspace, i - 1);
+ result = Fcons (value, result);
+ result = Fcons (key, result);
+ }
+ break;
+ }
+ default:
+ emacs_abort ();
+ }
+
+ parser->object_workspace_current = first;
+
+ return result;
+}
+
+/* Token-char is not a JSON terminology. When parsing
+ null/false/true, this function tells the character set that is need
+ to be considered as part of a token. For example, if the input is
+ "truesomething", then the parser shouldn't consider it as "true",
+ and an additional later "something" token. An additional example:
+ if the input is "truetrue", then calling (json-parse-buffer) twice
+ shouldn't produce two successful calls which return t, but a
+ parsing error */
+static bool
+json_is_token_char (int c)
+{
+ return ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
+ || (c >= '0' && c <= '9') || (c == '-'));
+}
+
+/* This is the entry point to the value parser, this parses a JSON
+ * value */
+Lisp_Object
+json_parse_value (struct json_parser *parser, int c)
+{
+ if (c == '{')
+ return json_parse_object (parser);
+ else if (c == '[')
+ return json_parse_array (parser);
+ else if (c == '"')
+ {
+ json_byte_workspace_reset (parser);
+ json_parse_string (parser);
+ Lisp_Object result
+ = make_string_from_utf8 ((const char *)
+ parser->byte_workspace,
+ (parser->byte_workspace_current
+ - parser->byte_workspace));
+ return result;
+ }
+ else if ((c >= '0' && c <= '9') || (c == '-'))
+ return json_parse_number (parser, c);
+ else
+ {
+ int c2 = json_input_get (parser);
+ int c3 = json_input_get (parser);
+ int c4 = json_input_get (parser);
+ int c5 = json_input_get_if_possible (parser);
+
+ if (c == 't' && c2 == 'r' && c3 == 'u' && c4 == 'e'
+ && (c5 < 0 || !json_is_token_char (c5)))
+ {
+ if (c5 >= 0)
+ json_input_put_back (parser);
+ parser->current_column += 4;
+ return Qt;
+ }
+ if (c == 'n' && c2 == 'u' && c3 == 'l' && c4 == 'l'
+ && (c5 < 0 || !json_is_token_char (c5)))
+ {
+ if (c5 >= 0)
+ json_input_put_back (parser);
+ parser->current_column += 4;
+ return parser->conf.null_object;
+ }
+ if (c == 'f' && c2 == 'a' && c3 == 'l' && c4 == 's'
+ && c5 == 'e')
+ {
+ int c6 = json_input_get_if_possible (parser);
+ if (c6 < 0 || !json_is_token_char (c6))
+ {
+ if (c6 >= 0)
+ json_input_put_back (parser);
+ parser->current_column += 5;
+ return parser->conf.false_object;
+ }
+ }
+
+ json_signal_error (parser, Qjson_parse_error);
+ }
+}
+
+enum ParseEndBehavior
+ {
+ PARSEENDBEHAVIOR_CheckForGarbage,
+ PARSEENDBEHAVIOR_MovePoint
+ };
+
+static Lisp_Object
+json_parse (struct json_parser *parser,
+ enum ParseEndBehavior parse_end_behavior)
+{
+ int c = json_skip_whitespace (parser);
+
+ Lisp_Object result = json_parse_value (parser, c);
+
+ switch (parse_end_behavior)
+ {
+ case PARSEENDBEHAVIOR_CheckForGarbage:
+ c = json_skip_whitespace_if_possible (parser);
+ if (c >= 0)
+ json_signal_error (parser, Qjson_trailing_content);
+ break;
+ case PARSEENDBEHAVIOR_MovePoint:
+ {
+ ptrdiff_t point
+ = (PT_BYTE + parser->input_current - parser->input_begin
+ + parser->additional_bytes_count);
+ SET_PT_BOTH (BYTE_TO_CHAR (point), point);
+ break;
+ }
+ }
+
+ return result;
+}
+
+DEFUN ("json-parse-string-fast", Fjson_parse_string_fast,
+ Sjson_parse_string_fast, 1, MANY, NULL,
+ doc: /* Parse the JSON STRING into a Lisp object.
+This is an experimental function, it is a faster
+implementation of json-parse-string. The functionality is the
+same, with the only exception how this version handles duplicate keys:
+If :object-type is `alist' or `plist', json-parse-string-fast
+doesn't filter duplicate keys. */)
+(ptrdiff_t nargs, Lisp_Object *args)
+{
+ specpdl_ref count = SPECPDL_INDEX ();
+
+ Lisp_Object string = args[0];
+ CHECK_STRING (string);
+ Lisp_Object encoded = json_encode (string);
+ struct json_configuration conf
+ = { json_object_hashtable, json_array_array, QCnull, QCfalse };
+ json_parse_args (nargs - 1, args + 1, &conf, true);
+
+ struct json_parser p;
+ const unsigned char *begin
+ = (const unsigned char *) SSDATA (encoded);
+ json_parser_init (&p, conf, begin, begin + SBYTES (encoded), NULL,
+ NULL);
+ record_unwind_protect_ptr (json_parser_done, &p);
+
+ return unbind_to (count,
+ json_parse (&p,
+ PARSEENDBEHAVIOR_CheckForGarbage));
+}
+
+DEFUN ("json-parse-buffer-fast", Fjson_parse_buffer_fast,
+ Sjson_parse_buffer_fast, 0, MANY, NULL,
+ doc: /* Read JSON object from current buffer starting at point.
+This is an experimental function, it is a faster
+implementation of json-parse-buffer. The functionality is the
+same, with the only exception how this version handles duplicate keys:
+If :object-type is `alist' or `plist', json-parse-buffer-fast
+doesn't filter duplicate keys. */)
+(ptrdiff_t nargs, Lisp_Object *args)
+{
+ specpdl_ref count = SPECPDL_INDEX ();
+
+ struct json_configuration conf
+ = { json_object_hashtable, json_array_array, QCnull, QCfalse };
+ json_parse_args (nargs, args, &conf, true);
+
+ struct json_parser p;
+ unsigned char *begin = PT_ADDR;
+ unsigned char *end = GPT_ADDR;
+ unsigned char *secondary_begin = NULL;
+ unsigned char *secondary_end = NULL;
+ if (GPT_ADDR < Z_ADDR)
+ {
+ secondary_begin = GAP_END_ADDR;
+ if (secondary_begin < PT_ADDR)
+ secondary_begin = PT_ADDR;
+ secondary_end = Z_ADDR;
+ }
+
+ json_parser_init (&p, conf, begin, end, secondary_begin,
+ secondary_end);
+ record_unwind_protect_ptr (json_parser_done, &p);
+
+ return unbind_to (count,
+ json_parse (&p, PARSEENDBEHAVIOR_MovePoint));
+}
+
void
syms_of_json (void)
{
@@ -1102,6 +2196,10 @@ syms_of_json (void)
DEFSYM (Qjson_end_of_file, "json-end-of-file");
DEFSYM (Qjson_trailing_content, "json-trailing-content");
DEFSYM (Qjson_object_too_deep, "json-object-too-deep");
+ DEFSYM (Qjson_utf8_decode_error, "json-utf8-decode-error")
+ DEFSYM (Qjson_invalid_surrogate_error, "json-invalid-surrogate-error")
+ DEFSYM (Qjson_number_out_of_range, "json-number-out-of-range-error")
+ DEFSYM (Qjson_escape_sequence_error, "json-escape-sequence-error")
DEFSYM (Qjson_unavailable, "json-unavailable");
define_error (Qjson_error, "generic JSON error", Qerror);
define_error (Qjson_out_of_memory,
@@ -1113,6 +2211,14 @@ syms_of_json (void)
Qjson_parse_error);
define_error (Qjson_object_too_deep,
"object cyclic or Lisp evaluation too deep", Qjson_error);
+ define_error (Qjson_utf8_decode_error,
+ "invalid utf-8 encoding", Qjson_error);
+ define_error (Qjson_invalid_surrogate_error,
+ "invalid surrogate pair", Qjson_error);
+ define_error (Qjson_number_out_of_range,
+ "number out of range", Qjson_error);
+ define_error (Qjson_escape_sequence_error,
+ "invalid escape sequence", Qjson_parse_error);
DEFSYM (Qpure, "pure");
DEFSYM (Qside_effect_free, "side-effect-free");
@@ -1137,4 +2243,6 @@ syms_of_json (void)
defsubr (&Sjson_insert);
defsubr (&Sjson_parse_string);
defsubr (&Sjson_parse_buffer);
+ defsubr (&Sjson_parse_string_fast);
+ defsubr (&Sjson_parse_buffer_fast);
}
--
2.42.0
^ permalink raw reply related [flat|nested] 21+ messages in thread
* Re: [PATCH] Implement fast verisons of json-parse functions
2024-03-19 18:23 [PATCH] Implement fast verisons of json-parse functions Herman, Géza
@ 2024-03-19 18:47 ` Eli Zaretskii
2024-03-19 18:50 ` Herman, Géza
0 siblings, 1 reply; 21+ messages in thread
From: Eli Zaretskii @ 2024-03-19 18:47 UTC (permalink / raw)
To: Géza Herman; +Cc: emacs-devel
> From: Herman, Géza <geza.herman@gmail.com>
> Date: Tue, 19 Mar 2024 19:23:36 +0100
>
> Here's the latest version of my JSON parser. It is the same as before,
> the only modification I did is that this patch doesn't replace the
> original parser, but keeps them, and adds to two additional functions
> with the -fast suffix.
Thanks, but I don't see why we would want to have two sets of these
functions. So please rework the patch to replace the existing
functions with ones that are based on your parser. This will leave us
with libjansson for producing JSON from Lisp, which is not optimal,
but still worth it, I think.
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH] Implement fast verisons of json-parse functions
2024-03-19 18:47 ` Eli Zaretskii
@ 2024-03-19 18:50 ` Herman, Géza
2024-03-19 19:28 ` Eli Zaretskii
0 siblings, 1 reply; 21+ messages in thread
From: Herman, Géza @ 2024-03-19 18:50 UTC (permalink / raw)
To: Eli Zaretskii; +Cc: Géza Herman, emacs-devel
[-- Attachment #1: Type: text/plain, Size: 1164 bytes --]
Eli Zaretskii <eliz@gnu.org> writes:
>> From: Herman, Géza <geza.herman@gmail.com>
>> Date: Tue, 19 Mar 2024 19:23:36 +0100
>>
>> Here's the latest version of my JSON parser. It is the same as
>> before,
>> the only modification I did is that this patch doesn't replace
>> the
>> original parser, but keeps them, and adds to two additional
>> functions
>> with the -fast suffix.
>
> Thanks, but I don't see why we would want to have two sets of
> these
> functions. So please rework the patch to replace the existing
> functions with ones that are based on your parser. This will
> leave us
> with libjansson for producing JSON from Lisp, which is not
> optimal,
> but still worth it, I think.
I created this, because you previously said: "
That's okay, we can start by making this an optional feature, and
consider making it the default after a couple of major releases;
meanwhile, any problems will be detected and reported."
I found this way safer, but I also don't have a problem with the
replacement way. Actually the last github link I sent contains
this way of working. Here is the patch for that version.
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-replace-jansson-parser-with-a-custom-one.patch --]
[-- Type: text/x-diff, Size: 43395 bytes --]
From 12decaddc9b8260745be4d5782fea21f4578eab2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?G=C3=A9za=20Herman?= <geza.herman@gmail.com>
Date: Wed, 6 Mar 2024 13:14:50 +0100
Subject: [PATCH] replace jansson parser with a custom one
---
src/json.c | 1306 ++++++++++++++++++++++++++++++++++++++++++----------
1 file changed, 1064 insertions(+), 242 deletions(-)
diff --git a/src/json.c b/src/json.c
index e849ccaf722..84e52b39d9d 100644
--- a/src/json.c
+++ b/src/json.c
@@ -23,6 +23,7 @@ Copyright (C) 2017-2024 Free Software Foundation, Inc.
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
+#include <math.h>
#include <jansson.h>
@@ -237,41 +238,6 @@ json_out_of_memory (void)
xsignal0 (Qjson_out_of_memory);
}
-/* Signal a Lisp error corresponding to the JSON ERROR. */
-
-static AVOID
-json_parse_error (const json_error_t *error)
-{
- Lisp_Object symbol;
-#if JSON_HAS_ERROR_CODE
- switch (json_error_code (error))
- {
- case json_error_premature_end_of_input:
- symbol = Qjson_end_of_file;
- break;
- case json_error_end_of_input_expected:
- symbol = Qjson_trailing_content;
- break;
- default:
- symbol = Qjson_parse_error;
- break;
- }
-#else
- if (json_has_suffix (error->text, "expected near end of file"))
- symbol = Qjson_end_of_file;
- else if (json_has_prefix (error->text, "end of file expected"))
- symbol = Qjson_trailing_content;
- else
- symbol = Qjson_parse_error;
-#endif
- xsignal (symbol,
- list5 (build_string_from_utf8 (error->text),
- build_string_from_utf8 (error->source),
- INT_TO_INTEGER (error->line),
- INT_TO_INTEGER (error->column),
- INT_TO_INTEGER (error->position)));
-}
-
static void
json_release_object (void *object)
{
@@ -794,145 +760,1029 @@ DEFUN ("json-insert", Fjson_insert, Sjson_insert, 1, MANY,
return unbind_to (count, Qnil);
}
-/* Convert a JSON object to a Lisp object. */
+struct json_parser
+{
+ /* Because of a possible gap in the input (an emacs buffer can have
+ a gap), the input is described by [input_begin;input_end) and
+ [secondary_input_begin;secondary_input_end). If the input is
+ continuous, then secondary_input_begin and secondary_input_end
+ should be NULL */
+ const unsigned char *input_current;
+ const unsigned char *input_begin;
+ const unsigned char *input_end;
+
+ const unsigned char *secondary_input_begin;
+ const unsigned char *secondary_input_end;
+
+ int current_line;
+ int current_column;
+
+ /* The parser has a maximum allowed depth. available_depth
+ decreases at each object/array begin. If reaches zero, then an
+ error is generated */
+ int available_depth;
+
+ struct json_configuration conf;
+
+ size_t additional_bytes_count;
+
+ /* Lisp_Objects are collected in this area during object/array
+ parsing */
+ Lisp_Object object_workspace;
+ size_t object_workspace_size;
+ size_t object_workspace_current;
+
+ /* String and number parsing uses this workspace */
+ unsigned char *byte_workspace;
+ unsigned char *byte_workspace_end;
+ unsigned char *byte_workspace_current;
+};
+
+static AVOID
+json_signal_error (struct json_parser *parser, Lisp_Object error)
+{
+ xsignal2 (error, INT_TO_INTEGER (parser->current_line),
+ INT_TO_INTEGER (parser->current_column));
+}
+
+static void
+json_parser_init (struct json_parser *parser,
+ struct json_configuration conf,
+ const unsigned char *input,
+ const unsigned char *input_end,
+ const unsigned char *secondary_input,
+ const unsigned char *secondary_input_end)
+{
+ const int initial_object_workspace_size = 64;
+ const int initial_string_workspace_size = 512;
+
+ if (secondary_input >= secondary_input_end)
+ {
+ secondary_input = NULL;
+ secondary_input_end = NULL;
+ }
+
+ if (input < input_end)
+ {
+ parser->input_begin = input;
+ parser->input_end = input_end;
+
+ parser->secondary_input_begin = secondary_input;
+ parser->secondary_input_end = secondary_input_end;
+ }
+ else
+ {
+ parser->input_begin = secondary_input;
+ parser->input_end = secondary_input_end;
+
+ parser->secondary_input_begin = NULL;
+ parser->secondary_input_end = NULL;
+ }
+
+ parser->input_current = parser->input_begin;
+
+ parser->current_line = 1;
+ parser->current_column = 0;
+ parser->available_depth = 10000;
+ parser->conf = conf;
+
+ parser->additional_bytes_count = 0;
+
+ parser->object_workspace
+ = make_vector (initial_object_workspace_size, Qnil);
+ parser->object_workspace_size = initial_object_workspace_size;
+ parser->object_workspace_current = 0;
+
+ parser->byte_workspace = xmalloc (initial_string_workspace_size);
+ parser->byte_workspace_end
+ = parser->byte_workspace + initial_string_workspace_size;
+}
+
+static void
+json_parser_done (void *parser)
+{
+ struct json_parser *p = (struct json_parser *) parser;
+ xfree (p->byte_workspace);
+}
+
+/* Makes sure that the object_workspace has 'size' available space for
+ Lisp_Objects */
+NO_INLINE static void
+json_make_object_workspace_for_slow_path (struct json_parser *parser,
+ size_t size)
+{
+ size_t needed_workspace_size
+ = (parser->object_workspace_current + size);
+ size_t new_workspace_size = parser->object_workspace_size;
+ while (new_workspace_size < needed_workspace_size)
+ {
+ if (ckd_mul (&new_workspace_size, new_workspace_size, 2))
+ {
+ json_signal_error (parser, Qjson_out_of_memory);
+ }
+ }
+ Lisp_Object new_workspace = make_vector (new_workspace_size, Qnil);
+ for (size_t i = 0; i < parser->object_workspace_current; i++)
+ {
+ ASET (new_workspace, i, AREF (parser->object_workspace, i));
+ }
+ parser->object_workspace = new_workspace;
+ parser->object_workspace_size = new_workspace_size;
+}
+
+INLINE void
+json_make_object_workspace_for (struct json_parser *parser,
+ size_t size)
+{
+ if (parser->object_workspace_size - parser->object_workspace_current
+ < size)
+ {
+ json_make_object_workspace_for_slow_path (parser, size);
+ }
+}
+
+static void
+json_byte_workspace_reset (struct json_parser *parser)
+{
+ parser->byte_workspace_current = parser->byte_workspace;
+}
+
+/* Puts 'value' into the byte_workspace. If there is no space
+ available, it allocates space */
+NO_INLINE static void
+json_byte_workspace_put_slow_path (struct json_parser *parser,
+ unsigned char value)
+{
+ size_t new_workspace_size
+ = parser->byte_workspace_end - parser->byte_workspace;
+ if (ckd_mul (&new_workspace_size, new_workspace_size, 2))
+ {
+ json_signal_error (parser, Qjson_out_of_memory);
+ }
+
+ size_t offset
+ = parser->byte_workspace_current - parser->byte_workspace;
+ parser->byte_workspace
+ = xrealloc (parser->byte_workspace, new_workspace_size);
+ parser->byte_workspace_end
+ = parser->byte_workspace + new_workspace_size;
+ parser->byte_workspace_current = parser->byte_workspace + offset;
+ *parser->byte_workspace_current++ = value;
+}
+
+INLINE void
+json_byte_workspace_put (struct json_parser *parser,
+ unsigned char value)
+{
+ if (parser->byte_workspace_current < parser->byte_workspace_end)
+ {
+ *parser->byte_workspace_current++ = value;
+ }
+ else
+ {
+ json_byte_workspace_put_slow_path (parser, value);
+ }
+}
+
+static bool
+json_input_at_eof (struct json_parser *parser)
+{
+ if (parser->input_current < parser->input_end)
+ return false;
+ return parser->secondary_input_end == NULL;
+}
+
+/* If there is a secondary buffer, it switches to it */
+static int
+json_input_switch_to_secondary (struct json_parser *parser)
+{
+ if (parser->secondary_input_begin < parser->secondary_input_end)
+ {
+ parser->additional_bytes_count
+ = parser->input_end - parser->input_begin;
+ parser->input_begin = parser->secondary_input_begin;
+ parser->input_end = parser->secondary_input_end;
+ parser->input_current = parser->secondary_input_begin;
+ parser->secondary_input_begin = NULL;
+ parser->secondary_input_end = NULL;
+ return 0;
+ }
+ else
+ return -1;
+}
+
+/* Reads a byte from the JSON input stream */
+static unsigned char
+json_input_get (struct json_parser *parser)
+{
+ if (parser->input_current >= parser->input_end
+ && json_input_switch_to_secondary (parser) < 0)
+ json_signal_error (parser, Qjson_end_of_file);
+ return *parser->input_current++;
+}
+
+/* Reads a byte from the JSON input stream, if the stream is not at
+ * eof. At eof, returns -1 */
+static int
+json_input_get_if_possible (struct json_parser *parser)
+{
+ if (parser->input_current >= parser->input_end
+ && json_input_switch_to_secondary (parser) < 0)
+ return -1;
+ return *parser->input_current++;
+}
+
+/* Puts back the last read input byte. Only one byte can be put back,
+ because otherwise this code would need to handle switching from
+ the secondary buffer to the initial */
+static void
+json_input_put_back (struct json_parser *parser)
+{
+ parser->input_current--;
+}
+
+static bool
+json_skip_whitespace_internal (struct json_parser *parser, int c)
+{
+ parser->current_column++;
+ if (c == 0x20 || c == 0x09 || c == 0x0d)
+ return false;
+ else if (c == 0x0a)
+ {
+ parser->current_line++;
+ parser->current_column = 0;
+ return false;
+ }
+ else
+ return true;
+}
+
+/* Skips JSON whitespace, and returns with the first non-whitespace
+ * character */
+static int
+json_skip_whitespace (struct json_parser *parser)
+{
+ for (;;)
+ {
+ int c = json_input_get (parser);
+ if (json_skip_whitespace_internal (parser, c))
+ return c;
+ }
+}
+
+/* Skips JSON whitespace, and returns with the first non-whitespace
+ * character, if possible. If there is no non-whitespace character
+ * (because we reached the end), it returns -1 */
+static int
+json_skip_whitespace_if_possible (struct json_parser *parser)
+{
+ for (;;)
+ {
+ int c = json_input_get_if_possible (parser);
+ if (c < 0)
+ return c;
+ if (json_skip_whitespace_internal (parser, c))
+ return c;
+ }
+}
+
+static int
+json_hex_value (int c)
+{
+ if (c >= '0' && c <= '9')
+ return c - '0';
+ else if (c >= 'A' && c <= 'F')
+ return c - 'A' + 10;
+ else if (c >= 'a' && c <= 'f')
+ return c - 'a' + 10;
+ else
+ return -1;
+}
+
+/* Parses the CCCC part of the unicode escape sequence \uCCCC */
+static int
+json_parse_unicode (struct json_parser *parser)
+{
+ unsigned char v[4];
+ for (int i = 0; i < 4; i++)
+ {
+ int c = json_hex_value (json_input_get (parser));
+ parser->current_column++;
+ if (c < 0)
+ json_signal_error (parser, Qjson_escape_sequence_error);
+ v[i] = c;
+ }
+
+ return v[0] << 12 | v[1] << 8 | v[2] << 4 | v[3];
+}
+
+/* Parses an utf-8 code-point encoding (except the first byte), and
+ returns the numeric value of the code-point (without considering
+ the first byte) */
+static int
+json_handle_utf8_tail_bytes (struct json_parser *parser, int n)
+{
+ int v = 0;
+ for (int i = 0; i < n; i++)
+ {
+ int c = json_input_get (parser);
+ json_byte_workspace_put (parser, c);
+ if ((c & 0xc0) != 0x80)
+ json_signal_error (parser, Qjson_utf8_decode_error);
+ v = (v << 6) | (c & 0x3f);
+ }
+ return v;
+}
+
+/* Reads a JSON string, and puts the result into the byte workspace */
+static void
+json_parse_string (struct json_parser *parser)
+{
+ /* a single_uninteresting byte can be simply copied from the input
+ to output, it doesn't need any extra care. This means all the
+ characters between [0x20;0x7f], except the double quote and
+ the backslash */
+ static const char is_single_uninteresting[256] = {
+ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+ /* 0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 1 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 2 */ 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ /* 3 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ /* 4 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ /* 5 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
+ /* 6 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ /* 7 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ /* 8 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 9 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* a */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* b */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* c */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* d */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* e */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* f */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ };
+
+ for (;;)
+ {
+ /* This if is only here for a possible speedup. If there are 4
+ bytes available, and all of them are single_uninteresting,
+ then we can just copy these 4 bytes to output */
+ if (parser->input_end - parser->input_current >= 4)
+ {
+ int c0 = parser->input_current[0];
+ int c1 = parser->input_current[1];
+ int c2 = parser->input_current[2];
+ int c3 = parser->input_current[3];
+ bool v0 = is_single_uninteresting[c0];
+ bool v1 = is_single_uninteresting[c1];
+ bool v2 = is_single_uninteresting[c2];
+ bool v3 = is_single_uninteresting[c3];
+ if (v0 && v1 && v2 && v3)
+ {
+ json_byte_workspace_put (parser, c0);
+ json_byte_workspace_put (parser, c1);
+ json_byte_workspace_put (parser, c2);
+ json_byte_workspace_put (parser, c3);
+ parser->input_current += 4;
+ parser->current_column += 4;
+ continue;
+ }
+ }
+
+ int c = json_input_get (parser);
+ parser->current_column++;
+ if (is_single_uninteresting[c])
+ {
+ json_byte_workspace_put (parser, c);
+ continue;
+ }
+
+ if (c == '"')
+ return;
+ else if (c & 0x80)
+ {
+ /* Handle utf-8 encoding */
+ json_byte_workspace_put (parser, c);
+ if (c < 0xc0)
+ json_signal_error (parser, Qjson_utf8_decode_error);
+ else if (c < 0xe0)
+ {
+ int n = ((c & 0x1f) << 6
+ | json_handle_utf8_tail_bytes (parser, 1));
+ if (n < 0x80)
+ json_signal_error (parser, Qjson_utf8_decode_error);
+ }
+ else if (c < 0xf0)
+ {
+ int n = ((c & 0xf) << 12
+ | json_handle_utf8_tail_bytes (parser, 2));
+ if (n < 0x800 || (n >= 0xd800 && n < 0xe000))
+ json_signal_error (parser, Qjson_utf8_decode_error);
+ }
+ else if (c < 0xf8)
+ {
+ int n = ((c & 0x7) << 18
+ | json_handle_utf8_tail_bytes (parser, 3));
+ if (n < 0x10000 || n > 0x10ffff)
+ json_signal_error (parser, Qjson_utf8_decode_error);
+ }
+ else
+ json_signal_error (parser, Qjson_utf8_decode_error);
+ }
+ else if (c == '\\')
+ {
+ /* Handle escape sequences */
+ c = json_input_get (parser);
+ parser->current_column++;
+ if (c == '"')
+ json_byte_workspace_put (parser, '"');
+ else if (c == '\\')
+ json_byte_workspace_put (parser, '\\');
+ else if (c == '/')
+ json_byte_workspace_put (parser, '/');
+ else if (c == 'b')
+ json_byte_workspace_put (parser, '\b');
+ else if (c == 'f')
+ json_byte_workspace_put (parser, '\f');
+ else if (c == 'n')
+ json_byte_workspace_put (parser, '\n');
+ else if (c == 'r')
+ json_byte_workspace_put (parser, '\r');
+ else if (c == 't')
+ json_byte_workspace_put (parser, '\t');
+ else if (c == 'u')
+ {
+ int num = json_parse_unicode (parser);
+ /* is the first half of the surrogate pair */
+ if (num >= 0xd800 && num < 0xdc00)
+ {
+ parser->current_column++;
+ if (json_input_get (parser) != '\\')
+ json_signal_error (parser,
+ Qjson_invalid_surrogate_error);
+ parser->current_column++;
+ if (json_input_get (parser) != 'u')
+ json_signal_error (parser,
+ Qjson_invalid_surrogate_error);
+ int num2 = json_parse_unicode (parser);
+ if (num2 < 0xdc00 || num2 >= 0xe000)
+ json_signal_error (parser,
+ Qjson_invalid_surrogate_error);
+ num = (0x10000
+ + ((num - 0xd800) << 10 | (num2 - 0xdc00)));
+ }
+ else if (num >= 0xdc00 && num < 0xe000)
+ /* is the second half of the surrogate pair without
+ the first half */
+ json_signal_error (parser,
+ Qjson_invalid_surrogate_error);
+
+ /* utf-8 encode the code-point */
+ if (num < 0x80)
+ json_byte_workspace_put (parser, num);
+ else if (num < 0x800)
+ {
+ json_byte_workspace_put (parser, 0xc0 | num >> 6);
+ json_byte_workspace_put (parser,
+ 0x80 | (num & 0x3f));
+ }
+ else if (num < 0x10000)
+ {
+ json_byte_workspace_put (parser, 0xe0 | num >> 12);
+ json_byte_workspace_put (parser,
+ (0x80
+ | ((num >> 6) & 0x3f)));
+ json_byte_workspace_put (parser,
+ 0x80 | (num & 0x3f));
+ }
+ else
+ {
+ json_byte_workspace_put (parser, 0xf0 | num >> 18);
+ json_byte_workspace_put (parser,
+ (0x80
+ | ((num >> 12) & 0x3f)));
+ json_byte_workspace_put (parser,
+ (0x80
+ | ((num >> 6) & 0x3f)));
+ json_byte_workspace_put (parser,
+ 0x80 | (num & 0x3f));
+ }
+ }
+ else
+ json_signal_error (parser, Qjson_escape_sequence_error);
+ }
+ else
+ json_signal_error (parser, Qjson_parse_error);
+ }
+}
+
+/* If there was no integer overflow during parsing the integer, this
+ puts 'value' to the output. Otherwise this calls string_to_number
+ to parse integer on the byte workspace. This could just always
+ call string_to_number, but for performance reasons, during parsing
+ the code tries to calculate the value, so in most cases, we can
+ save call of string_to_number */
+static Lisp_Object
+json_create_integer (struct json_parser *parser,
+ bool integer_overflow, bool negative,
+ EMACS_UINT value)
+{
+ if (!integer_overflow)
+ {
+ if (negative)
+ {
+ uintmax_t v = value;
+ if (v <= (uintmax_t) INTMAX_MAX + 1)
+ return INT_TO_INTEGER ((intmax_t) -v);
+ }
+ else
+ return INT_TO_INTEGER (value);
+ }
+
+ json_byte_workspace_put (parser, 0);
+ ptrdiff_t len;
+ Lisp_Object result
+ = string_to_number ((const char *) parser->byte_workspace, 10,
+ &len);
+ if (len
+ != parser->byte_workspace_current - parser->byte_workspace - 1)
+ json_signal_error (parser, Qjson_error);
+ return result;
+}
+
+/* Parses a float using the byte workspace */
+static Lisp_Object
+json_create_float (struct json_parser *parser)
+{
+ json_byte_workspace_put (parser, 0);
+ errno = 0;
+ char *e;
+ double value = strtod ((const char *) parser->byte_workspace, &e);
+ bool out_of_range
+ = (errno != 0 && (value == HUGE_VAL || value == -HUGE_VAL));
+ if (out_of_range)
+ json_signal_error (parser, Qjson_number_out_of_range);
+ else if ((const unsigned char *) e
+ != parser->byte_workspace_current - 1)
+ json_signal_error (parser, Qjson_error);
+ else
+ return make_float (value);
+}
+
+/* Parses a number. The first character is the input parameter 'c'.
+ */
+static Lisp_Object
+json_parse_number (struct json_parser *parser, int c)
+{
+ json_byte_workspace_reset (parser);
+ json_byte_workspace_put (parser, c);
+
+ bool negative = false;
+ if (c == '-')
+ {
+ negative = true;
+ c = json_input_get (parser);
+ json_byte_workspace_put (parser, c);
+ parser->current_column++;
+ }
+ if (c < '0' || c > '9')
+ json_signal_error (parser, Qjson_parse_error);
+
+ /* The idea is that during finding the last character of the
+ number, the for loop below also tries to calculate the value. If
+ the parsed number is an integer which fits into unsigned long,
+ then the parser can use the value of 'integer' right away,
+ instead of having to re-parse the byte workspace later.
+ Ideally, this integer should have the same size as a CPU general
+ purpose register. */
+ EMACS_UINT integer = c - '0';
+ bool integer_overflow = false;
+
+ if (integer == 0)
+ {
+ if (json_input_at_eof (parser))
+ return INT_TO_INTEGER (0);
+ c = json_input_get (parser);
+ }
+ else
+ {
+ for (;;)
+ {
+ if (json_input_at_eof (parser))
+ return json_create_integer (parser, integer_overflow,
+ negative, integer);
+ c = json_input_get (parser);
+ if (c < '0' || c > '9')
+ break;
+ json_byte_workspace_put (parser, c);
+ parser->current_column++;
+
+ integer_overflow |= ckd_mul (&integer, integer, 10);
+ integer_overflow |= ckd_add (&integer, integer, c - '0');
+ }
+ }
+
+ bool is_float = false;
+ if (c == '.')
+ {
+ json_byte_workspace_put (parser, c);
+ parser->current_column++;
+
+ is_float = true;
+ c = json_input_get (parser);
+ json_byte_workspace_put (parser, c);
+ parser->current_column++;
+ if (c < '0' || c > '9')
+ json_signal_error (parser, Qjson_parse_error);
+ for (;;)
+ {
+ if (json_input_at_eof (parser))
+ return json_create_float (parser);
+ c = json_input_get (parser);
+ if (c < '0' || c > '9')
+ break;
+ json_byte_workspace_put (parser, c);
+ parser->current_column++;
+ }
+ }
+ if (c == 'e' || c == 'E')
+ {
+ json_byte_workspace_put (parser, c);
+ parser->current_column++;
+
+ is_float = true;
+ c = json_input_get (parser);
+ json_byte_workspace_put (parser, c);
+ parser->current_column++;
+ if (c == '-' || c == '+')
+ {
+ c = json_input_get (parser);
+ json_byte_workspace_put (parser, c);
+ parser->current_column++;
+ }
+ if (c < '0' || c > '9')
+ json_signal_error (parser, Qjson_parse_error);
+ for (;;)
+ {
+ if (json_input_at_eof (parser))
+ return json_create_float (parser);
+ c = json_input_get (parser);
+ if (c < '0' || c > '9')
+ break;
+ json_byte_workspace_put (parser, c);
+ parser->current_column++;
+ }
+ }
+
+ /* 'c' contains a character which is not part of the number,
+ so it is need to be put back */
+ json_input_put_back (parser);
+
+ if (is_float)
+ return json_create_float (parser);
+ else
+ return json_create_integer (parser, integer_overflow, negative,
+ integer);
+}
+
+static Lisp_Object json_parse_value (struct json_parser *parser,
+ int c);
+
+/* Parses a JSON array. */
+static Lisp_Object
+json_parse_array (struct json_parser *parser)
+{
+ int c = json_skip_whitespace (parser);
+
+ const size_t first = parser->object_workspace_current;
+
+ if (c != ']')
+ {
+ parser->available_depth--;
+ if (parser->available_depth < 0)
+ json_signal_error (parser, Qjson_object_too_deep);
+
+ size_t number_of_elements = 0;
+ /* This loop collects the array elements in the object workspace
+ */
+ for (;;)
+ {
+ Lisp_Object element = json_parse_value (parser, c);
+ json_make_object_workspace_for (parser, 1);
+ ASET (parser->object_workspace,
+ parser->object_workspace_current, element);
+ parser->object_workspace_current++;
+
+ c = json_skip_whitespace (parser);
+
+ number_of_elements++;
+ if (c == ']')
+ {
+ parser->available_depth++;
+ break;
+ }
+
+ if (c != ',')
+ json_signal_error (parser, Qjson_parse_error);
+
+ c = json_skip_whitespace (parser);
+ }
+ }
+
+ Lisp_Object result;
+ size_t number_of_elements
+ = parser->object_workspace_current - first;
+
+ switch (parser->conf.array_type)
+ {
+ case json_array_array:
+ result = make_vector (number_of_elements, Qnil);
+ for (size_t i = 0; i < number_of_elements; i++)
+ {
+ rarely_quit (i);
+ ASET (result, i,
+ AREF (parser->object_workspace, first + i));
+ }
+ break;
+ case json_array_list:
+ result = Qnil;
+ for (size_t i = 0; i < number_of_elements; ++i)
+ {
+ rarely_quit (i);
+ result
+ = Fcons (AREF (parser->object_workspace,
+ parser->object_workspace_current - i - 1),
+ result);
+ }
+ break;
+ default:
+ emacs_abort ();
+ }
+
+ parser->object_workspace_current = first;
+
+ return result;
+}
-static Lisp_Object ARG_NONNULL ((1))
-json_to_lisp (json_t *json, const struct json_configuration *conf)
+/* Parses a JSON object. */
+static Lisp_Object
+json_parse_object (struct json_parser *parser)
{
- switch (json_typeof (json))
+ int c = json_skip_whitespace (parser);
+
+ const size_t first = parser->object_workspace_current;
+
+ if (c != '}')
{
- case JSON_NULL:
- return conf->null_object;
- case JSON_FALSE:
- return conf->false_object;
- case JSON_TRUE:
- return Qt;
- case JSON_INTEGER:
+ parser->available_depth--;
+ if (parser->available_depth < 0)
+ json_signal_error (parser, Qjson_object_too_deep);
+
+ /* This loop collects the object members (key/value pairs) in
+ * the object workspace */
+ for (;;)
+ {
+ if (c != '"')
+ json_signal_error (parser, Qjson_parse_error);
+
+ Lisp_Object key;
+ json_byte_workspace_reset (parser);
+ switch (parser->conf.object_type)
+ {
+ case json_object_hashtable:
+ {
+ json_parse_string (parser);
+ key
+ = make_string_from_utf8 ((char *)
+ parser->byte_workspace,
+ (parser->byte_workspace_current
+ - parser->byte_workspace));
+ break;
+ }
+ case json_object_alist:
+ {
+ json_parse_string (parser);
+ key = Fintern (make_string_from_utf8 (
+ (char *) parser->byte_workspace,
+ (parser->byte_workspace_current
+ - parser->byte_workspace)),
+ Qnil);
+ break;
+ }
+ case json_object_plist:
+ {
+ json_byte_workspace_put (parser, ':');
+ json_parse_string (parser);
+ key = intern_1 ((char *) parser->byte_workspace,
+ (parser->byte_workspace_current
+ - parser->byte_workspace));
+ break;
+ }
+ default:
+ emacs_abort ();
+ }
+
+ c = json_skip_whitespace (parser);
+ if (c != ':')
+ json_signal_error (parser, Qjson_parse_error);
+
+ c = json_skip_whitespace (parser);
+
+ Lisp_Object value = json_parse_value (parser, c);
+
+ json_make_object_workspace_for (parser, 2);
+ ASET (parser->object_workspace,
+ parser->object_workspace_current, key);
+ parser->object_workspace_current++;
+ ASET (parser->object_workspace,
+ parser->object_workspace_current, value);
+ parser->object_workspace_current++;
+
+ c = json_skip_whitespace (parser);
+
+ if (c == '}')
+ {
+ parser->available_depth++;
+ break;
+ }
+
+ if (c != ',')
+ json_signal_error (parser, Qjson_parse_error);
+
+ c = json_skip_whitespace (parser);
+ }
+ }
+
+ Lisp_Object result;
+ switch (parser->conf.object_type)
+ {
+ case json_object_hashtable:
{
- json_int_t i = json_integer_value (json);
- return INT_TO_INTEGER (i);
+ result
+ = CALLN (Fmake_hash_table, QCtest, Qequal, QCsize,
+ make_fixed_natnum (
+ (parser->object_workspace_current - first) / 2));
+ struct Lisp_Hash_Table *h = XHASH_TABLE (result);
+ for (size_t i = first; i < parser->object_workspace_current;
+ i += 2)
+ {
+ hash_hash_t hash;
+ Lisp_Object key = AREF (parser->object_workspace, i);
+ Lisp_Object value
+ = AREF (parser->object_workspace, i + 1);
+ ptrdiff_t i = hash_lookup_get_hash (h, key, &hash);
+ if (i < 0)
+ hash_put (h, key, value, hash);
+ else
+ set_hash_value_slot (h, i, value);
+ }
+ break;
}
- case JSON_REAL:
- return make_float (json_real_value (json));
- case JSON_STRING:
- return make_string_from_utf8 (json_string_value (json),
- json_string_length (json));
- case JSON_ARRAY:
+ case json_object_alist:
{
- if (++lisp_eval_depth > max_lisp_eval_depth)
- xsignal0 (Qjson_object_too_deep);
- size_t size = json_array_size (json);
- if (PTRDIFF_MAX < size)
- overflow_error ();
- Lisp_Object result;
- switch (conf->array_type)
- {
- case json_array_array:
- {
- result = make_vector (size, Qunbound);
- for (ptrdiff_t i = 0; i < size; ++i)
- {
- rarely_quit (i);
- ASET (result, i,
- json_to_lisp (json_array_get (json, i), conf));
- }
- break;
- }
- case json_array_list:
- {
- result = Qnil;
- for (ptrdiff_t i = size - 1; i >= 0; --i)
- {
- rarely_quit (i);
- result = Fcons (json_to_lisp (json_array_get (json, i), conf),
- result);
- }
- break;
- }
- default:
- /* Can't get here. */
- emacs_abort ();
- }
- --lisp_eval_depth;
- return result;
+ result = Qnil;
+ for (size_t i = parser->object_workspace_current; i > first;
+ i -= 2)
+ {
+ Lisp_Object key = AREF (parser->object_workspace, i - 2);
+ Lisp_Object value
+ = AREF (parser->object_workspace, i - 1);
+ result = Fcons (Fcons (key, value), result);
+ }
+ break;
}
- case JSON_OBJECT:
+ case json_object_plist:
{
- if (++lisp_eval_depth > max_lisp_eval_depth)
- xsignal0 (Qjson_object_too_deep);
- Lisp_Object result;
- switch (conf->object_type)
- {
- case json_object_hashtable:
- {
- size_t size = json_object_size (json);
- if (FIXNUM_OVERFLOW_P (size))
- overflow_error ();
- result = CALLN (Fmake_hash_table, QCtest, Qequal, QCsize,
- make_fixed_natnum (size));
- struct Lisp_Hash_Table *h = XHASH_TABLE (result);
- const char *key_str;
- json_t *value;
- json_object_foreach (json, key_str, value)
- {
- Lisp_Object key = build_string_from_utf8 (key_str);
- hash_hash_t hash;
- ptrdiff_t i = hash_lookup_get_hash (h, key, &hash);
- /* Keys in JSON objects are unique, so the key can't
- be present yet. */
- eassert (i < 0);
- hash_put (h, key, json_to_lisp (value, conf), hash);
- }
- break;
- }
- case json_object_alist:
- {
- result = Qnil;
- const char *key_str;
- json_t *value;
- json_object_foreach (json, key_str, value)
- {
- Lisp_Object key
- = Fintern (build_string_from_utf8 (key_str), Qnil);
- result
- = Fcons (Fcons (key, json_to_lisp (value, conf)),
- result);
- }
- result = Fnreverse (result);
- break;
- }
- case json_object_plist:
- {
- result = Qnil;
- const char *key_str;
- json_t *value;
- json_object_foreach (json, key_str, value)
- {
- USE_SAFE_ALLOCA;
- ptrdiff_t key_str_len = strlen (key_str);
- char *keyword_key_str = SAFE_ALLOCA (1 + key_str_len + 1);
- keyword_key_str[0] = ':';
- strcpy (&keyword_key_str[1], key_str);
- Lisp_Object key = intern_1 (keyword_key_str, key_str_len + 1);
- /* Build the plist as value-key since we're going to
- reverse it in the end.*/
- result = Fcons (key, result);
- result = Fcons (json_to_lisp (value, conf), result);
- SAFE_FREE ();
- }
- result = Fnreverse (result);
- break;
- }
- default:
- /* Can't get here. */
- emacs_abort ();
- }
- --lisp_eval_depth;
- return result;
+ result = Qnil;
+ for (size_t i = parser->object_workspace_current; i > first;
+ i -= 2)
+ {
+ Lisp_Object key = AREF (parser->object_workspace, i - 2);
+ Lisp_Object value
+ = AREF (parser->object_workspace, i - 1);
+ result = Fcons (value, result);
+ result = Fcons (key, result);
+ }
+ break;
}
+ default:
+ emacs_abort ();
+ }
+
+ parser->object_workspace_current = first;
+
+ return result;
+}
+
+/* Token-char is not a JSON terminology. When parsing
+ null/false/true, this function tells the character set that is need
+ to be considered as part of a token. For example, if the input is
+ "truesomething", then the parser shouldn't consider it as "true",
+ and an additional later "something" token. An additional example:
+ if the input is "truetrue", then calling (json-parse-buffer) twice
+ shouldn't produce two successful calls which return t, but a
+ parsing error */
+static bool
+json_is_token_char (int c)
+{
+ return ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
+ || (c >= '0' && c <= '9') || (c == '-'));
+}
+
+/* This is the entry point to the value parser, this parses a JSON
+ * value */
+Lisp_Object
+json_parse_value (struct json_parser *parser, int c)
+{
+ if (c == '{')
+ return json_parse_object (parser);
+ else if (c == '[')
+ return json_parse_array (parser);
+ else if (c == '"')
+ {
+ json_byte_workspace_reset (parser);
+ json_parse_string (parser);
+ Lisp_Object result
+ = make_string_from_utf8 ((const char *)
+ parser->byte_workspace,
+ (parser->byte_workspace_current
+ - parser->byte_workspace));
+ return result;
+ }
+ else if ((c >= '0' && c <= '9') || (c == '-'))
+ return json_parse_number (parser, c);
+ else
+ {
+ int c2 = json_input_get (parser);
+ int c3 = json_input_get (parser);
+ int c4 = json_input_get (parser);
+ int c5 = json_input_get_if_possible (parser);
+
+ if (c == 't' && c2 == 'r' && c3 == 'u' && c4 == 'e'
+ && (c5 < 0 || !json_is_token_char (c5)))
+ {
+ if (c5 >= 0)
+ json_input_put_back (parser);
+ parser->current_column += 4;
+ return Qt;
+ }
+ if (c == 'n' && c2 == 'u' && c3 == 'l' && c4 == 'l'
+ && (c5 < 0 || !json_is_token_char (c5)))
+ {
+ if (c5 >= 0)
+ json_input_put_back (parser);
+ parser->current_column += 4;
+ return parser->conf.null_object;
+ }
+ if (c == 'f' && c2 == 'a' && c3 == 'l' && c4 == 's'
+ && c5 == 'e')
+ {
+ int c6 = json_input_get_if_possible (parser);
+ if (c6 < 0 || !json_is_token_char (c6))
+ {
+ if (c6 >= 0)
+ json_input_put_back (parser);
+ parser->current_column += 5;
+ return parser->conf.false_object;
+ }
+ }
+
+ json_signal_error (parser, Qjson_parse_error);
}
- /* Can't get here. */
- emacs_abort ();
+}
+
+enum ParseEndBehavior
+ {
+ PARSEENDBEHAVIOR_CheckForGarbage,
+ PARSEENDBEHAVIOR_MovePoint
+ };
+
+static Lisp_Object
+json_parse (struct json_parser *parser,
+ enum ParseEndBehavior parse_end_behavior)
+{
+ int c = json_skip_whitespace (parser);
+
+ Lisp_Object result = json_parse_value (parser, c);
+
+ switch (parse_end_behavior)
+ {
+ case PARSEENDBEHAVIOR_CheckForGarbage:
+ c = json_skip_whitespace_if_possible (parser);
+ if (c >= 0)
+ json_signal_error (parser, Qjson_trailing_content);
+ break;
+ case PARSEENDBEHAVIOR_MovePoint:
+ {
+ ptrdiff_t point
+ = (PT_BYTE + parser->input_current - parser->input_begin
+ + parser->additional_bytes_count);
+ SET_PT_BOTH (BYTE_TO_CHAR (point), point);
+ break;
+ }
+ }
+
+ return result;
}
DEFUN ("json-parse-string", Fjson_parse_string, Sjson_parse_string, 1, MANY,
@@ -950,7 +1800,9 @@ DEFUN ("json-parse-string", Fjson_parse_string, Sjson_parse_string, 1, MANY,
The keyword argument `:object-type' specifies which Lisp type is used
to represent objects; it can be `hash-table', `alist' or `plist'. It
-defaults to `hash-table'.
+defaults to `hash-table'. If an object has members with the same
+key, `hash-table' keeps only the last value of such keys, while
+`alist' and `plist' keep all the members.
The keyword argument `:array-type' specifies which Lisp type is used
to represent arrays; it can be `array' (the default) or `list'.
@@ -961,62 +1813,27 @@ DEFUN ("json-parse-string", Fjson_parse_string, Sjson_parse_string, 1, MANY,
The keyword argument `:false-object' specifies which object to use to
represent a JSON false value. It defaults to `:false'.
usage: (json-parse-string STRING &rest ARGS) */)
- (ptrdiff_t nargs, Lisp_Object *args)
+(ptrdiff_t nargs, Lisp_Object *args)
{
specpdl_ref count = SPECPDL_INDEX ();
-#ifdef WINDOWSNT
- ensure_json_available ();
-#endif
-
Lisp_Object string = args[0];
CHECK_STRING (string);
Lisp_Object encoded = json_encode (string);
- check_string_without_embedded_nulls (encoded);
- struct json_configuration conf =
- {json_object_hashtable, json_array_array, QCnull, QCfalse};
+ struct json_configuration conf
+ = { json_object_hashtable, json_array_array, QCnull, QCfalse };
json_parse_args (nargs - 1, args + 1, &conf, true);
- json_error_t error;
- json_t *object
- = json_loads (SSDATA (encoded), JSON_DECODE_ANY | JSON_ALLOW_NUL, &error);
- if (object == NULL)
- json_parse_error (&error);
-
- /* Avoid leaking the object in case of further errors. */
- if (object != NULL)
- record_unwind_protect_ptr (json_release_object, object);
+ struct json_parser p;
+ const unsigned char *begin
+ = (const unsigned char *) SSDATA (encoded);
+ json_parser_init (&p, conf, begin, begin + SBYTES (encoded), NULL,
+ NULL);
+ record_unwind_protect_ptr (json_parser_done, &p);
- return unbind_to (count, json_to_lisp (object, &conf));
-}
-
-struct json_read_buffer_data
-{
- /* Byte position of position to read the next chunk from. */
- ptrdiff_t point;
-};
-
-/* Callback for json_load_callback that reads from the current buffer.
- DATA must point to a structure of type json_read_buffer_data.
- data->point must point to the byte position to read from; after
- reading, data->point is advanced accordingly. The buffer point
- itself is ignored. This function may not exit nonlocally. */
-
-static size_t
-json_read_buffer_callback (void *buffer, size_t buflen, void *data)
-{
- struct json_read_buffer_data *d = data;
-
- /* First, parse from point to the gap or the end of the accessible
- portion, whatever is closer. */
- ptrdiff_t point = d->point;
- ptrdiff_t end = BUFFER_CEILING_OF (point) + 1;
- ptrdiff_t count = end - point;
- if (buflen < count)
- count = buflen;
- memcpy (buffer, BYTE_POS_ADDR (point), count);
- d->point += count;
- return count;
+ return unbind_to (count,
+ json_parse (&p,
+ PARSEENDBEHAVIOR_CheckForGarbage));
}
DEFUN ("json-parse-buffer", Fjson_parse_buffer, Sjson_parse_buffer,
@@ -1038,7 +1855,9 @@ DEFUN ("json-parse-buffer", Fjson_parse_buffer, Sjson_parse_buffer,
The keyword argument `:object-type' specifies which Lisp type is used
to represent objects; it can be `hash-table', `alist' or `plist'. It
-defaults to `hash-table'.
+defaults to `hash-table'. If an object has members with the same
+key, `hash-table' keeps only the last value of such keys, while
+`alist' and `plist' keep all the members.
The keyword argument `:array-type' specifies which Lisp type is used
to represent arrays; it can be `array' (the default) or `list'.
@@ -1049,42 +1868,33 @@ DEFUN ("json-parse-buffer", Fjson_parse_buffer, Sjson_parse_buffer,
The keyword argument `:false-object' specifies which object to use to
represent a JSON false value. It defaults to `:false'.
usage: (json-parse-buffer &rest args) */)
- (ptrdiff_t nargs, Lisp_Object *args)
+(ptrdiff_t nargs, Lisp_Object *args)
{
specpdl_ref count = SPECPDL_INDEX ();
-#ifdef WINDOWSNT
- ensure_json_available ();
-#endif
-
- struct json_configuration conf =
- {json_object_hashtable, json_array_array, QCnull, QCfalse};
+ struct json_configuration conf
+ = { json_object_hashtable, json_array_array, QCnull, QCfalse };
json_parse_args (nargs, args, &conf, true);
- ptrdiff_t point = PT_BYTE;
- struct json_read_buffer_data data = {.point = point};
- json_error_t error;
- json_t *object
- = json_load_callback (json_read_buffer_callback, &data,
- JSON_DECODE_ANY
- | JSON_DISABLE_EOF_CHECK
- | JSON_ALLOW_NUL,
- &error);
-
- if (object == NULL)
- json_parse_error (&error);
-
- /* Avoid leaking the object in case of further errors. */
- record_unwind_protect_ptr (json_release_object, object);
-
- /* Convert and then move point only if everything succeeded. */
- Lisp_Object lisp = json_to_lisp (object, &conf);
+ struct json_parser p;
+ unsigned char *begin = PT_ADDR;
+ unsigned char *end = GPT_ADDR;
+ unsigned char *secondary_begin = NULL;
+ unsigned char *secondary_end = NULL;
+ if (GPT_ADDR < Z_ADDR)
+ {
+ secondary_begin = GAP_END_ADDR;
+ if (secondary_begin < PT_ADDR)
+ secondary_begin = PT_ADDR;
+ secondary_end = Z_ADDR;
+ }
- /* Adjust point by how much we just read. */
- point += error.position;
- SET_PT_BOTH (BYTE_TO_CHAR (point), point);
+ json_parser_init (&p, conf, begin, end, secondary_begin,
+ secondary_end);
+ record_unwind_protect_ptr (json_parser_done, &p);
- return unbind_to (count, lisp);
+ return unbind_to (count,
+ json_parse (&p, PARSEENDBEHAVIOR_MovePoint));
}
void
@@ -1102,6 +1912,10 @@ syms_of_json (void)
DEFSYM (Qjson_end_of_file, "json-end-of-file");
DEFSYM (Qjson_trailing_content, "json-trailing-content");
DEFSYM (Qjson_object_too_deep, "json-object-too-deep");
+ DEFSYM (Qjson_utf8_decode_error, "json-utf8-decode-error")
+ DEFSYM (Qjson_invalid_surrogate_error, "json-invalid-surrogate-error")
+ DEFSYM (Qjson_number_out_of_range, "json-number-out-of-range-error")
+ DEFSYM (Qjson_escape_sequence_error, "json-escape-sequence-error")
DEFSYM (Qjson_unavailable, "json-unavailable");
define_error (Qjson_error, "generic JSON error", Qerror);
define_error (Qjson_out_of_memory,
@@ -1113,6 +1927,14 @@ syms_of_json (void)
Qjson_parse_error);
define_error (Qjson_object_too_deep,
"object cyclic or Lisp evaluation too deep", Qjson_error);
+ define_error (Qjson_utf8_decode_error,
+ "invalid utf-8 encoding", Qjson_error);
+ define_error (Qjson_invalid_surrogate_error,
+ "invalid surrogate pair", Qjson_error);
+ define_error (Qjson_number_out_of_range,
+ "number out of range", Qjson_error);
+ define_error (Qjson_escape_sequence_error,
+ "invalid escape sequence", Qjson_parse_error);
DEFSYM (Qpure, "pure");
DEFSYM (Qside_effect_free, "side-effect-free");
--
2.42.0
^ permalink raw reply related [flat|nested] 21+ messages in thread
* Re: [PATCH] Implement fast verisons of json-parse functions
2024-03-19 18:50 ` Herman, Géza
@ 2024-03-19 19:28 ` Eli Zaretskii
2024-03-19 19:50 ` Herman, Géza
0 siblings, 1 reply; 21+ messages in thread
From: Eli Zaretskii @ 2024-03-19 19:28 UTC (permalink / raw)
To: Géza Herman; +Cc: emacs-devel
> From: Herman, Géza <geza.herman@gmail.com>
> Cc: Géza Herman <geza.herman@gmail.com>,
> emacs-devel@gnu.org
> Date: Tue, 19 Mar 2024 19:50:26 +0100
>
> I created this, because you previously said: "
> That's okay, we can start by making this an optional feature, and
> consider making it the default after a couple of major releases;
> meanwhile, any problems will be detected and reported."
An optional feature I had in mind was conditional compilation, so that
users should decide at build time which version they want. But the
names and signatures of the functions exposed to Lisp should be the
same in both cases, so that Lisp programmers don't need to change
their programs to cater to both alternatives.
Apologies if what I wrote was not clear enough about that. For the
future, I suggest that we discuss the practical implication of
significant changes before you go ahead and invest many hours in
implementing and testing them. I would like to avoid situations where
you invest significant efforts based on misunderstandings.
Thanks.
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH] Implement fast verisons of json-parse functions
2024-03-19 19:28 ` Eli Zaretskii
@ 2024-03-19 19:50 ` Herman, Géza
2024-03-20 3:37 ` Eli Zaretskii
0 siblings, 1 reply; 21+ messages in thread
From: Herman, Géza @ 2024-03-19 19:50 UTC (permalink / raw)
To: Eli Zaretskii; +Cc: Géza Herman, emacs-devel
Eli Zaretskii <eliz@gnu.org> writes:
>> From: Herman, Géza <geza.herman@gmail.com>
>> Cc: Géza Herman <geza.herman@gmail.com>,
>> emacs-devel@gnu.org
>> Date: Tue, 19 Mar 2024 19:50:26 +0100
>>
>> I created this, because you previously said: "
>> That's okay, we can start by making this an optional feature,
>> and
>> consider making it the default after a couple of major
>> releases;
>> meanwhile, any problems will be detected and reported."
>
> An optional feature I had in mind was conditional compilation,
> so that
> users should decide at build time which version they want. But
> the
> names and signatures of the functions exposed to Lisp should be
> the
> same in both cases, so that Lisp programmers don't need to
> change
> their programs to cater to both alternatives.
>
> Apologies if what I wrote was not clear enough about that. For
> the
> future, I suggest that we discuss the practical implication of
> significant changes before you go ahead and invest many hours in
> implementing and testing them. I would like to avoid situations
> where
> you invest significant efforts based on misunderstandings.
Thank you for the great attitude! No worries, it only took ~10
minutes to create that version. And even if we don't want to have
that approach, it can still be useful for validation (comparing
the parsing results of the original and new parser) and
benchmarking purposes.
If we want to have conditional compilation, is there a simple
similar conditional compilation case already in Emacs? So I can
copy the approach easily (supposedly I need to add a flag to
configure, it needs to add a -DUSE_OWN_JSON_PARSER switch to the
compiller, etc.)
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH] Implement fast verisons of json-parse functions
2024-03-19 19:50 ` Herman, Géza
@ 2024-03-20 3:37 ` Eli Zaretskii
2024-03-23 10:40 ` Herman, Géza
0 siblings, 1 reply; 21+ messages in thread
From: Eli Zaretskii @ 2024-03-20 3:37 UTC (permalink / raw)
To: Géza Herman; +Cc: emacs-devel
> From: Herman, Géza <geza.herman@gmail.com>
> Cc: Géza Herman <geza.herman@gmail.com>,
> emacs-devel@gnu.org
> Date: Tue, 19 Mar 2024 20:50:48 +0100
>
>
> Eli Zaretskii <eliz@gnu.org> writes:
>
> > An optional feature I had in mind was conditional compilation,
> > so that
> > users should decide at build time which version they want. But
> > the
> > names and signatures of the functions exposed to Lisp should be
> > the
> > same in both cases, so that Lisp programmers don't need to
> > change
> > their programs to cater to both alternatives.
> >
> > Apologies if what I wrote was not clear enough about that. For
> > the
> > future, I suggest that we discuss the practical implication of
> > significant changes before you go ahead and invest many hours in
> > implementing and testing them. I would like to avoid situations
> > where
> > you invest significant efforts based on misunderstandings.
>
> Thank you for the great attitude! No worries, it only took ~10
> minutes to create that version. And even if we don't want to have
> that approach, it can still be useful for validation (comparing
> the parsing results of the original and new parser) and
> benchmarking purposes.
>
> If we want to have conditional compilation, is there a simple
> similar conditional compilation case already in Emacs? So I can
> copy the approach easily (supposedly I need to add a flag to
> configure, it needs to add a -DUSE_OWN_JSON_PARSER switch to the
> compiller, etc.)
Let's wait with this until we have a clear idea whether we need this.
My suggestion was just to present a possibility, not necessarily a
decision that we should have this. I'm not sure yet whether we need
to offer the possibility of using libjansson for parsing JSON as an
option.
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH] Implement fast verisons of json-parse functions
2024-03-20 3:37 ` Eli Zaretskii
@ 2024-03-23 10:40 ` Herman, Géza
2024-03-23 11:11 ` Eli Zaretskii
2024-03-30 7:43 ` Eli Zaretskii
0 siblings, 2 replies; 21+ messages in thread
From: Herman, Géza @ 2024-03-23 10:40 UTC (permalink / raw)
To: Eli Zaretskii; +Cc: Géza Herman, emacs-devel
[-- Attachment #1: Type: text/plain, Size: 1158 bytes --]
Eli Zaretskii <eliz@gnu.org> writes:
>> If we want to have conditional compilation, is there a simple
>> similar conditional compilation case already in Emacs? So I
>> can
>> copy the approach easily (supposedly I need to add a flag to
>> configure, it needs to add a -DUSE_OWN_JSON_PARSER switch to
>> the
>> compiller, etc.)
>
> Let's wait with this until we have a clear idea whether we need
> this.
> My suggestion was just to present a possibility, not necessarily
> a
> decision that we should have this. I'm not sure yet whether we
> need
> to offer the possibility of using libjansson for parsing JSON as
> an
> option.
What is the process for deciding this?
I attached the latest version of the parser. I reverted the Lisp
vector based workspace to use a malloc based one, and added a note
regarding that the GC most not run during parsing. I also
implemented some additional optimizations, this version runs ~10x
faster on my LSP dataset. I also run the parser on all the JSON
files I found on my computer (~60,000 files, 1.5 GiB), no problems
were found. The parser runs ~6.5x faster (3.5 sec vs 23.4 sec) on
this dataset.
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-replace-jansson-parser-with-a-custom-one.patch --]
[-- Type: text/x-diff, Size: 45384 bytes --]
From f1a8a9fa010ae2ff154805a7ffef8616fb0a1eeb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?G=C3=A9za=20Herman?= <geza.herman@gmail.com>
Date: Wed, 6 Mar 2024 13:14:50 +0100
Subject: [PATCH] replace jansson parser with a custom one
---
src/json.c | 1364 ++++++++++++++++++++++++++++++++++++++++++----------
1 file changed, 1122 insertions(+), 242 deletions(-)
diff --git a/src/json.c b/src/json.c
index e849ccaf722..bdb9e4cdd58 100644
--- a/src/json.c
+++ b/src/json.c
@@ -23,6 +23,7 @@ Copyright (C) 2017-2024 Free Software Foundation, Inc.
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
+#include <math.h>
#include <jansson.h>
@@ -237,41 +238,6 @@ json_out_of_memory (void)
xsignal0 (Qjson_out_of_memory);
}
-/* Signal a Lisp error corresponding to the JSON ERROR. */
-
-static AVOID
-json_parse_error (const json_error_t *error)
-{
- Lisp_Object symbol;
-#if JSON_HAS_ERROR_CODE
- switch (json_error_code (error))
- {
- case json_error_premature_end_of_input:
- symbol = Qjson_end_of_file;
- break;
- case json_error_end_of_input_expected:
- symbol = Qjson_trailing_content;
- break;
- default:
- symbol = Qjson_parse_error;
- break;
- }
-#else
- if (json_has_suffix (error->text, "expected near end of file"))
- symbol = Qjson_end_of_file;
- else if (json_has_prefix (error->text, "end of file expected"))
- symbol = Qjson_trailing_content;
- else
- symbol = Qjson_parse_error;
-#endif
- xsignal (symbol,
- list5 (build_string_from_utf8 (error->text),
- build_string_from_utf8 (error->source),
- INT_TO_INTEGER (error->line),
- INT_TO_INTEGER (error->column),
- INT_TO_INTEGER (error->position)));
-}
-
static void
json_release_object (void *object)
{
@@ -794,145 +760,1087 @@ DEFUN ("json-insert", Fjson_insert, Sjson_insert, 1, MANY,
return unbind_to (count, Qnil);
}
-/* Convert a JSON object to a Lisp object. */
+#define JSON_PARSER_INTERNAL_OBJECT_WORKSPACE_SIZE 64
+#define JSON_PARSER_INTERNAL_BYTE_WORKSPACE_SIZE 512
+
+struct json_parser
+{
+ /* Because of a possible gap in the input (an emacs buffer can have
+ a gap), the input is described by [input_begin;input_end) and
+ [secondary_input_begin;secondary_input_end). If the input is
+ continuous, then secondary_input_begin and secondary_input_end
+ should be NULL */
+ const unsigned char *input_current;
+ const unsigned char *input_begin;
+ const unsigned char *input_end;
+
+ const unsigned char *secondary_input_begin;
+ const unsigned char *secondary_input_end;
+
+ ptrdiff_t current_line;
+ ptrdiff_t current_column;
+ ptrdiff_t point_of_current_line;
+
+ /* The parser has a maximum allowed depth. available_depth
+ decreases at each object/array begin. If reaches zero, then an
+ error is generated */
+ int available_depth;
+
+ struct json_configuration conf;
+
+ size_t additional_bytes_count;
+
+ /* Lisp_Objects are collected in this area during object/array
+ parsing. To avoid allocations, initially
+ internal_object_workspace is used. If it runs out of space then
+ we switch to allocated space. Important note: with this design,
+ GC must not run during JSON parsing, otherwise Lisp_Objects in
+ the workspace may get incorrectly collected. */
+ Lisp_Object internal_object_workspace
+ [JSON_PARSER_INTERNAL_OBJECT_WORKSPACE_SIZE];
+ Lisp_Object *object_workspace;
+ size_t object_workspace_size;
+ size_t object_workspace_current;
+
+ /* String and number parsing uses this workspace. The idea behind
+ internal_byte_workspace is the same as the idea behind
+ internal_object_workspace */
+ unsigned char
+ internal_byte_workspace[JSON_PARSER_INTERNAL_BYTE_WORKSPACE_SIZE];
+ unsigned char *byte_workspace;
+ unsigned char *byte_workspace_end;
+ unsigned char *byte_workspace_current;
+};
+
+static AVOID
+json_signal_error (struct json_parser *parser, Lisp_Object error)
+{
+ xsignal3 (error, INT_TO_INTEGER (parser->current_line),
+ INT_TO_INTEGER (parser->current_column),
+ INT_TO_INTEGER (parser->point_of_current_line
+ + parser->current_column));
+}
+
+static void
+json_parser_init (struct json_parser *parser,
+ struct json_configuration conf,
+ const unsigned char *input,
+ const unsigned char *input_end,
+ const unsigned char *secondary_input,
+ const unsigned char *secondary_input_end)
+{
+ if (secondary_input >= secondary_input_end)
+ {
+ secondary_input = NULL;
+ secondary_input_end = NULL;
+ }
+
+ if (input < input_end)
+ {
+ parser->input_begin = input;
+ parser->input_end = input_end;
+
+ parser->secondary_input_begin = secondary_input;
+ parser->secondary_input_end = secondary_input_end;
+ }
+ else
+ {
+ parser->input_begin = secondary_input;
+ parser->input_end = secondary_input_end;
+
+ parser->secondary_input_begin = NULL;
+ parser->secondary_input_end = NULL;
+ }
+
+ parser->input_current = parser->input_begin;
+
+ parser->current_line = 1;
+ parser->current_column = 0;
+ parser->point_of_current_line = 0;
+ parser->available_depth = 10000;
+ parser->conf = conf;
+
+ parser->additional_bytes_count = 0;
+
+ parser->object_workspace = parser->internal_object_workspace;
+ parser->object_workspace_size
+ = JSON_PARSER_INTERNAL_OBJECT_WORKSPACE_SIZE;
+ parser->object_workspace_current = 0;
+
+ parser->byte_workspace = parser->internal_byte_workspace;
+ parser->byte_workspace_end
+ = (parser->byte_workspace
+ + JSON_PARSER_INTERNAL_BYTE_WORKSPACE_SIZE);
+}
+
+static void
+json_parser_done (void *parser)
+{
+ struct json_parser *p = (struct json_parser *) parser;
+ if (p->object_workspace != p->internal_object_workspace)
+ xfree (p->object_workspace);
+ if (p->byte_workspace != p->internal_byte_workspace)
+ xfree (p->byte_workspace);
+}
+
+/* Makes sure that the object_workspace has 'size' available space for
+ Lisp_Objects */
+NO_INLINE static void
+json_make_object_workspace_for_slow_path (struct json_parser *parser,
+ size_t size)
+{
+ size_t needed_workspace_size
+ = (parser->object_workspace_current + size);
+ size_t new_workspace_size = parser->object_workspace_size;
+ while (new_workspace_size < needed_workspace_size)
+ {
+ if (ckd_mul (&new_workspace_size, new_workspace_size, 2))
+ {
+ json_signal_error (parser, Qjson_out_of_memory);
+ }
+ }
+
+ Lisp_Object *new_workspace_ptr;
+ if (parser->object_workspace_size
+ == JSON_PARSER_INTERNAL_OBJECT_WORKSPACE_SIZE)
+ {
+ new_workspace_ptr
+ = xnmalloc (new_workspace_size, sizeof (Lisp_Object));
+ memcpy (new_workspace_ptr, parser->object_workspace,
+ (sizeof (Lisp_Object)
+ * parser->object_workspace_current));
+ }
+ else
+ {
+ new_workspace_ptr
+ = xnrealloc (parser->object_workspace, new_workspace_size,
+ sizeof (Lisp_Object));
+ }
+
+ parser->object_workspace = new_workspace_ptr;
+ parser->object_workspace_size = new_workspace_size;
+}
+
+INLINE void
+json_make_object_workspace_for (struct json_parser *parser,
+ size_t size)
+{
+ if (parser->object_workspace_size - parser->object_workspace_current
+ < size)
+ {
+ json_make_object_workspace_for_slow_path (parser, size);
+ }
+}
+
+static void
+json_byte_workspace_reset (struct json_parser *parser)
+{
+ parser->byte_workspace_current = parser->byte_workspace;
+}
+
+/* Puts 'value' into the byte_workspace. If there is no space
+ available, it allocates space */
+NO_INLINE static void
+json_byte_workspace_put_slow_path (struct json_parser *parser,
+ unsigned char value)
+{
+ size_t new_workspace_size
+ = parser->byte_workspace_end - parser->byte_workspace;
+ if (ckd_mul (&new_workspace_size, new_workspace_size, 2))
+ {
+ json_signal_error (parser, Qjson_out_of_memory);
+ }
+
+ size_t offset
+ = parser->byte_workspace_current - parser->byte_workspace;
+
+ if (parser->byte_workspace == parser->internal_byte_workspace)
+ {
+ parser->byte_workspace = xmalloc (new_workspace_size);
+ memcpy (parser->byte_workspace, parser->internal_byte_workspace,
+ offset);
+ }
+ else
+ {
+ parser->byte_workspace
+ = xrealloc (parser->byte_workspace, new_workspace_size);
+ }
+ parser->byte_workspace_end
+ = parser->byte_workspace + new_workspace_size;
+ parser->byte_workspace_current = parser->byte_workspace + offset;
+ *parser->byte_workspace_current++ = value;
+}
+
+INLINE void
+json_byte_workspace_put (struct json_parser *parser,
+ unsigned char value)
+{
+ if (parser->byte_workspace_current < parser->byte_workspace_end)
+ {
+ *parser->byte_workspace_current++ = value;
+ }
+ else
+ {
+ json_byte_workspace_put_slow_path (parser, value);
+ }
+}
+
+static bool
+json_input_at_eof (struct json_parser *parser)
+{
+ if (parser->input_current < parser->input_end)
+ return false;
+ return parser->secondary_input_end == NULL;
+}
+
+/* If there is a secondary buffer, this switches to it */
+static int
+json_input_switch_to_secondary (struct json_parser *parser)
+{
+ if (parser->secondary_input_begin < parser->secondary_input_end)
+ {
+ parser->additional_bytes_count
+ = parser->input_end - parser->input_begin;
+ parser->input_begin = parser->secondary_input_begin;
+ parser->input_end = parser->secondary_input_end;
+ parser->input_current = parser->secondary_input_begin;
+ parser->secondary_input_begin = NULL;
+ parser->secondary_input_end = NULL;
+ return 0;
+ }
+ else
+ return -1;
+}
+
+/* Reads a byte from the JSON input stream */
+NO_INLINE static unsigned char
+json_input_get_slow_path (struct json_parser *parser)
+{
+ if (json_input_switch_to_secondary (parser) < 0)
+ json_signal_error (parser, Qjson_end_of_file);
+ return *parser->input_current++;
+}
+
+static unsigned char
+json_input_get (struct json_parser *parser)
+{
+ if (parser->input_current < parser->input_end)
+ return *parser->input_current++;
+ return json_input_get_slow_path (parser);
+}
+
+/* Reads a byte from the JSON input stream, if the stream is not at
+ * eof. At eof, returns -1 */
+static int
+json_input_get_if_possible (struct json_parser *parser)
+{
+ if (parser->input_current >= parser->input_end
+ && json_input_switch_to_secondary (parser) < 0)
+ return -1;
+ return *parser->input_current++;
+}
+
+/* Puts back the last read input byte. Only one byte can be put back,
+ because otherwise this code would need to handle switching from
+ the secondary buffer to the initial */
+static void
+json_input_put_back (struct json_parser *parser)
+{
+ parser->input_current--;
+}
+
+static bool
+json_skip_whitespace_internal (struct json_parser *parser, int c)
+{
+ parser->current_column++;
+ if (c == 0x20 || c == 0x09 || c == 0x0d)
+ return false;
+ else if (c == 0x0a)
+ {
+ parser->current_line++;
+ parser->point_of_current_line += parser->current_column;
+ parser->current_column = 0;
+ return false;
+ }
+ else
+ return true;
+}
+
+/* Skips JSON whitespace, and returns with the first non-whitespace
+ * character */
+static int
+json_skip_whitespace (struct json_parser *parser)
+{
+ for (;;)
+ {
+ int c = json_input_get (parser);
+ if (json_skip_whitespace_internal (parser, c))
+ return c;
+ }
+}
+
+/* Skips JSON whitespace, and returns with the first non-whitespace
+ * character, if possible. If there is no non-whitespace character
+ * (because we reached the end), it returns -1 */
+static int
+json_skip_whitespace_if_possible (struct json_parser *parser)
+{
+ for (;;)
+ {
+ int c = json_input_get_if_possible (parser);
+ if (c < 0)
+ return c;
+ if (json_skip_whitespace_internal (parser, c))
+ return c;
+ }
+}
+
+static int
+json_hex_value (int c)
+{
+ if (c >= '0' && c <= '9')
+ return c - '0';
+ else if (c >= 'A' && c <= 'F')
+ return c - 'A' + 10;
+ else if (c >= 'a' && c <= 'f')
+ return c - 'a' + 10;
+ else
+ return -1;
+}
+
+/* Parses the CCCC part of the unicode escape sequence \uCCCC */
+static int
+json_parse_unicode (struct json_parser *parser)
+{
+ unsigned char v[4];
+ for (int i = 0; i < 4; i++)
+ {
+ int c = json_hex_value (json_input_get (parser));
+ parser->current_column++;
+ if (c < 0)
+ json_signal_error (parser, Qjson_escape_sequence_error);
+ v[i] = c;
+ }
+
+ return v[0] << 12 | v[1] << 8 | v[2] << 4 | v[3];
+}
+
+/* Parses an utf-8 code-point encoding (except the first byte), and
+ returns the numeric value of the code-point (without considering
+ the first byte) */
+static int
+json_handle_utf8_tail_bytes (struct json_parser *parser, int n)
+{
+ int v = 0;
+ for (int i = 0; i < n; i++)
+ {
+ int c = json_input_get (parser);
+ json_byte_workspace_put (parser, c);
+ if ((c & 0xc0) != 0x80)
+ json_signal_error (parser, Qjson_utf8_decode_error);
+ v = (v << 6) | (c & 0x3f);
+ }
+ return v;
+}
+
+/* Reads a JSON string, and puts the result into the byte workspace */
+static void
+json_parse_string (struct json_parser *parser)
+{
+ /* a single_uninteresting byte can be simply copied from the input
+ to output, it doesn't need any extra care. This means all the
+ characters between [0x20;0x7f], except the double quote and
+ the backslash */
+ static const char is_single_uninteresting[256] = {
+ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+ /* 0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 1 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 2 */ 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ /* 3 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ /* 4 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ /* 5 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
+ /* 6 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ /* 7 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ /* 8 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 9 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* a */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* b */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* c */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* d */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* e */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* f */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ };
+
+ for (;;)
+ {
+ /* This if is only here for a possible speedup. If there are 4
+ bytes available, and all of them are single_uninteresting,
+ then we can just copy these 4 bytes to output */
+ if (parser->input_end - parser->input_current >= 4)
+ {
+ int c0 = parser->input_current[0];
+ int c1 = parser->input_current[1];
+ int c2 = parser->input_current[2];
+ int c3 = parser->input_current[3];
+ bool v0 = is_single_uninteresting[c0];
+ bool v1 = is_single_uninteresting[c1];
+ bool v2 = is_single_uninteresting[c2];
+ bool v3 = is_single_uninteresting[c3];
+ if (v0 && v1 && v2 && v3)
+ {
+ json_byte_workspace_put (parser, c0);
+ json_byte_workspace_put (parser, c1);
+ json_byte_workspace_put (parser, c2);
+ json_byte_workspace_put (parser, c3);
+ parser->input_current += 4;
+ parser->current_column += 4;
+ continue;
+ }
+ }
+
+ int c = json_input_get (parser);
+ parser->current_column++;
+ if (is_single_uninteresting[c])
+ {
+ json_byte_workspace_put (parser, c);
+ continue;
+ }
+
+ if (c == '"')
+ return;
+ else if (c & 0x80)
+ {
+ /* Handle utf-8 encoding */
+ json_byte_workspace_put (parser, c);
+ if (c < 0xc0)
+ json_signal_error (parser, Qjson_utf8_decode_error);
+ else if (c < 0xe0)
+ {
+ int n = ((c & 0x1f) << 6
+ | json_handle_utf8_tail_bytes (parser, 1));
+ if (n < 0x80)
+ json_signal_error (parser, Qjson_utf8_decode_error);
+ }
+ else if (c < 0xf0)
+ {
+ int n = ((c & 0xf) << 12
+ | json_handle_utf8_tail_bytes (parser, 2));
+ if (n < 0x800 || (n >= 0xd800 && n < 0xe000))
+ json_signal_error (parser, Qjson_utf8_decode_error);
+ }
+ else if (c < 0xf8)
+ {
+ int n = ((c & 0x7) << 18
+ | json_handle_utf8_tail_bytes (parser, 3));
+ if (n < 0x10000 || n > 0x10ffff)
+ json_signal_error (parser, Qjson_utf8_decode_error);
+ }
+ else
+ json_signal_error (parser, Qjson_utf8_decode_error);
+ }
+ else if (c == '\\')
+ {
+ /* Handle escape sequences */
+ c = json_input_get (parser);
+ parser->current_column++;
+ if (c == '"')
+ json_byte_workspace_put (parser, '"');
+ else if (c == '\\')
+ json_byte_workspace_put (parser, '\\');
+ else if (c == '/')
+ json_byte_workspace_put (parser, '/');
+ else if (c == 'b')
+ json_byte_workspace_put (parser, '\b');
+ else if (c == 'f')
+ json_byte_workspace_put (parser, '\f');
+ else if (c == 'n')
+ json_byte_workspace_put (parser, '\n');
+ else if (c == 'r')
+ json_byte_workspace_put (parser, '\r');
+ else if (c == 't')
+ json_byte_workspace_put (parser, '\t');
+ else if (c == 'u')
+ {
+ int num = json_parse_unicode (parser);
+ /* is the first half of the surrogate pair */
+ if (num >= 0xd800 && num < 0xdc00)
+ {
+ parser->current_column++;
+ if (json_input_get (parser) != '\\')
+ json_signal_error (parser,
+ Qjson_invalid_surrogate_error);
+ parser->current_column++;
+ if (json_input_get (parser) != 'u')
+ json_signal_error (parser,
+ Qjson_invalid_surrogate_error);
+ int num2 = json_parse_unicode (parser);
+ if (num2 < 0xdc00 || num2 >= 0xe000)
+ json_signal_error (parser,
+ Qjson_invalid_surrogate_error);
+ num = (0x10000
+ + ((num - 0xd800) << 10 | (num2 - 0xdc00)));
+ }
+ else if (num >= 0xdc00 && num < 0xe000)
+ /* is the second half of the surrogate pair without
+ the first half */
+ json_signal_error (parser,
+ Qjson_invalid_surrogate_error);
+
+ /* utf-8 encode the code-point */
+ if (num < 0x80)
+ json_byte_workspace_put (parser, num);
+ else if (num < 0x800)
+ {
+ json_byte_workspace_put (parser, 0xc0 | num >> 6);
+ json_byte_workspace_put (parser,
+ 0x80 | (num & 0x3f));
+ }
+ else if (num < 0x10000)
+ {
+ json_byte_workspace_put (parser, 0xe0 | num >> 12);
+ json_byte_workspace_put (parser,
+ (0x80
+ | ((num >> 6) & 0x3f)));
+ json_byte_workspace_put (parser,
+ 0x80 | (num & 0x3f));
+ }
+ else
+ {
+ json_byte_workspace_put (parser, 0xf0 | num >> 18);
+ json_byte_workspace_put (parser,
+ (0x80
+ | ((num >> 12) & 0x3f)));
+ json_byte_workspace_put (parser,
+ (0x80
+ | ((num >> 6) & 0x3f)));
+ json_byte_workspace_put (parser,
+ 0x80 | (num & 0x3f));
+ }
+ }
+ else
+ json_signal_error (parser, Qjson_escape_sequence_error);
+ }
+ else
+ json_signal_error (parser, Qjson_parse_error);
+ }
+}
+
+/* If there was no integer overflow during parsing the integer, this
+ puts 'value' to the output. Otherwise this calls string_to_number
+ to parse integer on the byte workspace. This could just always
+ call string_to_number, but for performance reasons, during parsing
+ the code tries to calculate the value, so in most cases, we can
+ save call of string_to_number */
+static Lisp_Object
+json_create_integer (struct json_parser *parser,
+ bool integer_overflow, bool negative,
+ EMACS_UINT value)
+{
+ if (!integer_overflow)
+ {
+ if (negative)
+ {
+ uintmax_t v = value;
+ if (v <= (uintmax_t) INTMAX_MAX + 1)
+ return INT_TO_INTEGER ((intmax_t) -v);
+ }
+ else
+ return INT_TO_INTEGER (value);
+ }
+
+ json_byte_workspace_put (parser, 0);
+ ptrdiff_t len;
+ Lisp_Object result
+ = string_to_number ((const char *) parser->byte_workspace, 10,
+ &len);
+ if (len
+ != parser->byte_workspace_current - parser->byte_workspace - 1)
+ json_signal_error (parser, Qjson_error);
+ return result;
+}
+
+/* Parses a float using the byte workspace */
+static Lisp_Object
+json_create_float (struct json_parser *parser)
+{
+ json_byte_workspace_put (parser, 0);
+ errno = 0;
+ char *e;
+ double value = strtod ((const char *) parser->byte_workspace, &e);
+ bool out_of_range
+ = (errno != 0 && (value == HUGE_VAL || value == -HUGE_VAL));
+ if (out_of_range)
+ json_signal_error (parser, Qjson_number_out_of_range);
+ else if ((const unsigned char *) e
+ != parser->byte_workspace_current - 1)
+ json_signal_error (parser, Qjson_error);
+ else
+ return make_float (value);
+}
+
+/* Parses a number. The first character is the input parameter 'c'.
+ */
+static Lisp_Object
+json_parse_number (struct json_parser *parser, int c)
+{
+ json_byte_workspace_reset (parser);
+ json_byte_workspace_put (parser, c);
+
+ bool negative = false;
+ if (c == '-')
+ {
+ negative = true;
+ c = json_input_get (parser);
+ json_byte_workspace_put (parser, c);
+ parser->current_column++;
+ }
+ if (c < '0' || c > '9')
+ json_signal_error (parser, Qjson_parse_error);
+
+ /* The idea is that during finding the last character of the
+ number, the for loop below also tries to calculate the value. If
+ the parsed number is an integer which fits into unsigned long,
+ then the parser can use the value of 'integer' right away,
+ instead of having to re-parse the byte workspace later.
+ Ideally, this integer should have the same size as a CPU general
+ purpose register. */
+ EMACS_UINT integer = c - '0';
+ bool integer_overflow = false;
+
+ if (integer == 0)
+ {
+ if (json_input_at_eof (parser))
+ return INT_TO_INTEGER (0);
+ c = json_input_get (parser);
+ }
+ else
+ {
+ for (;;)
+ {
+ if (json_input_at_eof (parser))
+ return json_create_integer (parser, integer_overflow,
+ negative, integer);
+ c = json_input_get (parser);
+ if (c < '0' || c > '9')
+ break;
+ json_byte_workspace_put (parser, c);
+ parser->current_column++;
+
+ integer_overflow |= ckd_mul (&integer, integer, 10);
+ integer_overflow |= ckd_add (&integer, integer, c - '0');
+ }
+ }
+
+ bool is_float = false;
+ if (c == '.')
+ {
+ json_byte_workspace_put (parser, c);
+ parser->current_column++;
+
+ is_float = true;
+ c = json_input_get (parser);
+ json_byte_workspace_put (parser, c);
+ parser->current_column++;
+ if (c < '0' || c > '9')
+ json_signal_error (parser, Qjson_parse_error);
+ for (;;)
+ {
+ if (json_input_at_eof (parser))
+ return json_create_float (parser);
+ c = json_input_get (parser);
+ if (c < '0' || c > '9')
+ break;
+ json_byte_workspace_put (parser, c);
+ parser->current_column++;
+ }
+ }
+ if (c == 'e' || c == 'E')
+ {
+ json_byte_workspace_put (parser, c);
+ parser->current_column++;
+
+ is_float = true;
+ c = json_input_get (parser);
+ json_byte_workspace_put (parser, c);
+ parser->current_column++;
+ if (c == '-' || c == '+')
+ {
+ c = json_input_get (parser);
+ json_byte_workspace_put (parser, c);
+ parser->current_column++;
+ }
+ if (c < '0' || c > '9')
+ json_signal_error (parser, Qjson_parse_error);
+ for (;;)
+ {
+ if (json_input_at_eof (parser))
+ return json_create_float (parser);
+ c = json_input_get (parser);
+ if (c < '0' || c > '9')
+ break;
+ json_byte_workspace_put (parser, c);
+ parser->current_column++;
+ }
+ }
+
+ /* 'c' contains a character which is not part of the number,
+ so it is need to be put back */
+ json_input_put_back (parser);
+
+ if (is_float)
+ return json_create_float (parser);
+ else
+ return json_create_integer (parser, integer_overflow, negative,
+ integer);
+}
+
+static Lisp_Object json_parse_value (struct json_parser *parser,
+ int c);
-static Lisp_Object ARG_NONNULL ((1))
-json_to_lisp (json_t *json, const struct json_configuration *conf)
+/* Parses a JSON array. */
+static Lisp_Object
+json_parse_array (struct json_parser *parser)
{
- switch (json_typeof (json))
+ int c = json_skip_whitespace (parser);
+
+ const size_t first = parser->object_workspace_current;
+ Lisp_Object result = Qnil;
+
+ if (c != ']')
+ {
+ parser->available_depth--;
+ if (parser->available_depth < 0)
+ json_signal_error (parser, Qjson_object_too_deep);
+
+ size_t number_of_elements = 0;
+ Lisp_Object *cdr = &result;
+ /* This loop collects the array elements in the object workspace
+ */
+ for (;;)
+ {
+ Lisp_Object element = json_parse_value (parser, c);
+ switch (parser->conf.array_type)
+ {
+ case json_array_array:
+ json_make_object_workspace_for (parser, 1);
+ parser->object_workspace[parser->object_workspace_current]
+ = element;
+ parser->object_workspace_current++;
+ break;
+ case json_array_list:
+ {
+ Lisp_Object nc = Fcons (element, Qnil);
+ *cdr = nc;
+ cdr = xcdr_addr (nc);
+ break;
+ }
+ default:
+ emacs_abort ();
+ }
+
+ c = json_skip_whitespace (parser);
+
+ number_of_elements++;
+ if (c == ']')
+ {
+ parser->available_depth++;
+ break;
+ }
+
+ if (c != ',')
+ json_signal_error (parser, Qjson_parse_error);
+
+ c = json_skip_whitespace (parser);
+ }
+ }
+
+ switch (parser->conf.array_type)
{
- case JSON_NULL:
- return conf->null_object;
- case JSON_FALSE:
- return conf->false_object;
- case JSON_TRUE:
- return Qt;
- case JSON_INTEGER:
+ case json_array_array:
{
- json_int_t i = json_integer_value (json);
- return INT_TO_INTEGER (i);
+ size_t number_of_elements
+ = parser->object_workspace_current - first;
+ result = make_vector (number_of_elements, Qnil);
+ for (size_t i = 0; i < number_of_elements; i++)
+ {
+ rarely_quit (i);
+ ASET (result, i, parser->object_workspace[first + i]);
+ }
+ parser->object_workspace_current = first;
+ break;
}
- case JSON_REAL:
- return make_float (json_real_value (json));
- case JSON_STRING:
- return make_string_from_utf8 (json_string_value (json),
- json_string_length (json));
- case JSON_ARRAY:
+ case json_array_list:
+ break;
+ default:
+ emacs_abort ();
+ }
+
+ return result;
+}
+
+/* Parses the ": value" part of a JSON object member. */
+static Lisp_Object
+json_parse_object_member_value (struct json_parser *parser)
+{
+ int c = json_skip_whitespace (parser);
+ if (c != ':')
+ json_signal_error (parser, Qjson_parse_error);
+
+ c = json_skip_whitespace (parser);
+
+ return json_parse_value (parser, c);
+}
+
+/* Parses a JSON object. */
+static Lisp_Object
+json_parse_object (struct json_parser *parser)
+{
+ int c = json_skip_whitespace (parser);
+
+ const size_t first = parser->object_workspace_current;
+ Lisp_Object result = Qnil;
+
+ if (c != '}')
+ {
+ parser->available_depth--;
+ if (parser->available_depth < 0)
+ json_signal_error (parser, Qjson_object_too_deep);
+
+ Lisp_Object *cdr = &result;
+
+ /* This loop collects the object members (key/value pairs) in
+ * the object workspace */
+ for (;;)
+ {
+ if (c != '"')
+ json_signal_error (parser, Qjson_parse_error);
+
+ json_byte_workspace_reset (parser);
+ switch (parser->conf.object_type)
+ {
+ case json_object_hashtable:
+ {
+ json_parse_string (parser);
+ Lisp_Object key
+ = make_string_from_utf8 ((char *)
+ parser->byte_workspace,
+ (parser->byte_workspace_current
+ - parser->byte_workspace));
+ Lisp_Object value
+ = json_parse_object_member_value (parser);
+ json_make_object_workspace_for (parser, 2);
+ parser->object_workspace[parser->object_workspace_current]
+ = key;
+ parser->object_workspace_current++;
+ parser->object_workspace[parser->object_workspace_current]
+ = value;
+ parser->object_workspace_current++;
+ break;
+ }
+ case json_object_alist:
+ {
+ json_parse_string (parser);
+ Lisp_Object key
+ = Fintern (make_string_from_utf8 (
+ (char *) parser->byte_workspace,
+ (parser->byte_workspace_current
+ - parser->byte_workspace)),
+ Qnil);
+ Lisp_Object value
+ = json_parse_object_member_value (parser);
+ Lisp_Object nc = Fcons (Fcons (key, value), Qnil);
+ *cdr = nc;
+ cdr = xcdr_addr (nc);
+ break;
+ }
+ case json_object_plist:
+ {
+ json_byte_workspace_put (parser, ':');
+ json_parse_string (parser);
+ Lisp_Object key
+ = intern_1 ((char *) parser->byte_workspace,
+ (parser->byte_workspace_current
+ - parser->byte_workspace));
+ Lisp_Object value
+ = json_parse_object_member_value (parser);
+ Lisp_Object nc = Fcons (key, Qnil);
+ *cdr = nc;
+ cdr = xcdr_addr (nc);
+
+ nc = Fcons (value, Qnil);
+ *cdr = nc;
+ cdr = xcdr_addr (nc);
+ break;
+ }
+ default:
+ emacs_abort ();
+ }
+
+ c = json_skip_whitespace (parser);
+
+ if (c == '}')
+ {
+ parser->available_depth++;
+ break;
+ }
+
+ if (c != ',')
+ json_signal_error (parser, Qjson_parse_error);
+
+ c = json_skip_whitespace (parser);
+ }
+ }
+
+ switch (parser->conf.object_type)
+ {
+ case json_object_hashtable:
{
- if (++lisp_eval_depth > max_lisp_eval_depth)
- xsignal0 (Qjson_object_too_deep);
- size_t size = json_array_size (json);
- if (PTRDIFF_MAX < size)
- overflow_error ();
- Lisp_Object result;
- switch (conf->array_type)
- {
- case json_array_array:
- {
- result = make_vector (size, Qunbound);
- for (ptrdiff_t i = 0; i < size; ++i)
- {
- rarely_quit (i);
- ASET (result, i,
- json_to_lisp (json_array_get (json, i), conf));
- }
- break;
- }
- case json_array_list:
- {
- result = Qnil;
- for (ptrdiff_t i = size - 1; i >= 0; --i)
- {
- rarely_quit (i);
- result = Fcons (json_to_lisp (json_array_get (json, i), conf),
- result);
- }
- break;
- }
- default:
- /* Can't get here. */
- emacs_abort ();
- }
- --lisp_eval_depth;
- return result;
+ result
+ = CALLN (Fmake_hash_table, QCtest, Qequal, QCsize,
+ make_fixed_natnum (
+ (parser->object_workspace_current - first) / 2));
+ struct Lisp_Hash_Table *h = XHASH_TABLE (result);
+ for (size_t i = first; i < parser->object_workspace_current;
+ i += 2)
+ {
+ hash_hash_t hash;
+ Lisp_Object key = parser->object_workspace[i];
+ Lisp_Object value = parser->object_workspace[i + 1];
+ ptrdiff_t i = hash_lookup_get_hash (h, key, &hash);
+ if (i < 0)
+ hash_put (h, key, value, hash);
+ else
+ set_hash_value_slot (h, i, value);
+ }
+ parser->object_workspace_current = first;
+ break;
}
- case JSON_OBJECT:
+ case json_object_alist:
+ case json_object_plist:
+ break;
+ default:
+ emacs_abort ();
+ }
+
+ return result;
+}
+
+/* Token-char is not a JSON terminology. When parsing
+ null/false/true, this function tells the character set that is need
+ to be considered as part of a token. For example, if the input is
+ "truesomething", then the parser shouldn't consider it as "true",
+ and an additional later "something" token. An additional example:
+ if the input is "truetrue", then calling (json-parse-buffer) twice
+ shouldn't produce two successful calls which return t, but a
+ parsing error */
+static bool
+json_is_token_char (int c)
+{
+ return ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
+ || (c >= '0' && c <= '9') || (c == '-'));
+}
+
+/* This is the entry point to the value parser, this parses a JSON
+ * value */
+Lisp_Object
+json_parse_value (struct json_parser *parser, int c)
+{
+ if (c == '{')
+ return json_parse_object (parser);
+ else if (c == '[')
+ return json_parse_array (parser);
+ else if (c == '"')
+ {
+ json_byte_workspace_reset (parser);
+ json_parse_string (parser);
+ Lisp_Object result
+ = make_string_from_utf8 ((const char *)
+ parser->byte_workspace,
+ (parser->byte_workspace_current
+ - parser->byte_workspace));
+ return result;
+ }
+ else if ((c >= '0' && c <= '9') || (c == '-'))
+ return json_parse_number (parser, c);
+ else
+ {
+ int c2 = json_input_get (parser);
+ int c3 = json_input_get (parser);
+ int c4 = json_input_get (parser);
+ int c5 = json_input_get_if_possible (parser);
+
+ if (c == 't' && c2 == 'r' && c3 == 'u' && c4 == 'e'
+ && (c5 < 0 || !json_is_token_char (c5)))
+ {
+ if (c5 >= 0)
+ json_input_put_back (parser);
+ parser->current_column += 3;
+ return Qt;
+ }
+ if (c == 'n' && c2 == 'u' && c3 == 'l' && c4 == 'l'
+ && (c5 < 0 || !json_is_token_char (c5)))
+ {
+ if (c5 >= 0)
+ json_input_put_back (parser);
+ parser->current_column += 3;
+ return parser->conf.null_object;
+ }
+ if (c == 'f' && c2 == 'a' && c3 == 'l' && c4 == 's'
+ && c5 == 'e')
+ {
+ int c6 = json_input_get_if_possible (parser);
+ if (c6 < 0 || !json_is_token_char (c6))
+ {
+ if (c6 >= 0)
+ json_input_put_back (parser);
+ parser->current_column += 4;
+ return parser->conf.false_object;
+ }
+ }
+
+ json_signal_error (parser, Qjson_parse_error);
+ }
+}
+
+enum ParseEndBehavior
+ {
+ PARSEENDBEHAVIOR_CheckForGarbage,
+ PARSEENDBEHAVIOR_MovePoint
+ };
+
+static Lisp_Object
+json_parse (struct json_parser *parser,
+ enum ParseEndBehavior parse_end_behavior)
+{
+ int c = json_skip_whitespace (parser);
+
+ Lisp_Object result = json_parse_value (parser, c);
+
+ switch (parse_end_behavior)
+ {
+ case PARSEENDBEHAVIOR_CheckForGarbage:
+ c = json_skip_whitespace_if_possible (parser);
+ if (c >= 0)
+ json_signal_error (parser, Qjson_trailing_content);
+ break;
+ case PARSEENDBEHAVIOR_MovePoint:
{
- if (++lisp_eval_depth > max_lisp_eval_depth)
- xsignal0 (Qjson_object_too_deep);
- Lisp_Object result;
- switch (conf->object_type)
- {
- case json_object_hashtable:
- {
- size_t size = json_object_size (json);
- if (FIXNUM_OVERFLOW_P (size))
- overflow_error ();
- result = CALLN (Fmake_hash_table, QCtest, Qequal, QCsize,
- make_fixed_natnum (size));
- struct Lisp_Hash_Table *h = XHASH_TABLE (result);
- const char *key_str;
- json_t *value;
- json_object_foreach (json, key_str, value)
- {
- Lisp_Object key = build_string_from_utf8 (key_str);
- hash_hash_t hash;
- ptrdiff_t i = hash_lookup_get_hash (h, key, &hash);
- /* Keys in JSON objects are unique, so the key can't
- be present yet. */
- eassert (i < 0);
- hash_put (h, key, json_to_lisp (value, conf), hash);
- }
- break;
- }
- case json_object_alist:
- {
- result = Qnil;
- const char *key_str;
- json_t *value;
- json_object_foreach (json, key_str, value)
- {
- Lisp_Object key
- = Fintern (build_string_from_utf8 (key_str), Qnil);
- result
- = Fcons (Fcons (key, json_to_lisp (value, conf)),
- result);
- }
- result = Fnreverse (result);
- break;
- }
- case json_object_plist:
- {
- result = Qnil;
- const char *key_str;
- json_t *value;
- json_object_foreach (json, key_str, value)
- {
- USE_SAFE_ALLOCA;
- ptrdiff_t key_str_len = strlen (key_str);
- char *keyword_key_str = SAFE_ALLOCA (1 + key_str_len + 1);
- keyword_key_str[0] = ':';
- strcpy (&keyword_key_str[1], key_str);
- Lisp_Object key = intern_1 (keyword_key_str, key_str_len + 1);
- /* Build the plist as value-key since we're going to
- reverse it in the end.*/
- result = Fcons (key, result);
- result = Fcons (json_to_lisp (value, conf), result);
- SAFE_FREE ();
- }
- result = Fnreverse (result);
- break;
- }
- default:
- /* Can't get here. */
- emacs_abort ();
- }
- --lisp_eval_depth;
- return result;
+ ptrdiff_t byte
+ = (PT_BYTE + parser->input_current - parser->input_begin
+ + parser->additional_bytes_count);
+ ptrdiff_t position;
+ if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
+ position = byte;
+ else
+ position
+ = PT + parser->point_of_current_line + parser->current_column;
+
+ SET_PT_BOTH (position, byte);
+ break;
}
}
- /* Can't get here. */
- emacs_abort ();
+
+ return result;
}
DEFUN ("json-parse-string", Fjson_parse_string, Sjson_parse_string, 1, MANY,
@@ -950,7 +1858,9 @@ DEFUN ("json-parse-string", Fjson_parse_string, Sjson_parse_string, 1, MANY,
The keyword argument `:object-type' specifies which Lisp type is used
to represent objects; it can be `hash-table', `alist' or `plist'. It
-defaults to `hash-table'.
+defaults to `hash-table'. If an object has members with the same
+key, `hash-table' keeps only the last value of such keys, while
+`alist' and `plist' keep all the members.
The keyword argument `:array-type' specifies which Lisp type is used
to represent arrays; it can be `array' (the default) or `list'.
@@ -961,62 +1871,27 @@ DEFUN ("json-parse-string", Fjson_parse_string, Sjson_parse_string, 1, MANY,
The keyword argument `:false-object' specifies which object to use to
represent a JSON false value. It defaults to `:false'.
usage: (json-parse-string STRING &rest ARGS) */)
- (ptrdiff_t nargs, Lisp_Object *args)
+(ptrdiff_t nargs, Lisp_Object *args)
{
specpdl_ref count = SPECPDL_INDEX ();
-#ifdef WINDOWSNT
- ensure_json_available ();
-#endif
-
Lisp_Object string = args[0];
CHECK_STRING (string);
Lisp_Object encoded = json_encode (string);
- check_string_without_embedded_nulls (encoded);
- struct json_configuration conf =
- {json_object_hashtable, json_array_array, QCnull, QCfalse};
+ struct json_configuration conf
+ = { json_object_hashtable, json_array_array, QCnull, QCfalse };
json_parse_args (nargs - 1, args + 1, &conf, true);
- json_error_t error;
- json_t *object
- = json_loads (SSDATA (encoded), JSON_DECODE_ANY | JSON_ALLOW_NUL, &error);
- if (object == NULL)
- json_parse_error (&error);
+ struct json_parser p;
+ const unsigned char *begin
+ = (const unsigned char *) SSDATA (encoded);
+ json_parser_init (&p, conf, begin, begin + SBYTES (encoded), NULL,
+ NULL);
+ record_unwind_protect_ptr (json_parser_done, &p);
- /* Avoid leaking the object in case of further errors. */
- if (object != NULL)
- record_unwind_protect_ptr (json_release_object, object);
-
- return unbind_to (count, json_to_lisp (object, &conf));
-}
-
-struct json_read_buffer_data
-{
- /* Byte position of position to read the next chunk from. */
- ptrdiff_t point;
-};
-
-/* Callback for json_load_callback that reads from the current buffer.
- DATA must point to a structure of type json_read_buffer_data.
- data->point must point to the byte position to read from; after
- reading, data->point is advanced accordingly. The buffer point
- itself is ignored. This function may not exit nonlocally. */
-
-static size_t
-json_read_buffer_callback (void *buffer, size_t buflen, void *data)
-{
- struct json_read_buffer_data *d = data;
-
- /* First, parse from point to the gap or the end of the accessible
- portion, whatever is closer. */
- ptrdiff_t point = d->point;
- ptrdiff_t end = BUFFER_CEILING_OF (point) + 1;
- ptrdiff_t count = end - point;
- if (buflen < count)
- count = buflen;
- memcpy (buffer, BYTE_POS_ADDR (point), count);
- d->point += count;
- return count;
+ return unbind_to (count,
+ json_parse (&p,
+ PARSEENDBEHAVIOR_CheckForGarbage));
}
DEFUN ("json-parse-buffer", Fjson_parse_buffer, Sjson_parse_buffer,
@@ -1038,7 +1913,9 @@ DEFUN ("json-parse-buffer", Fjson_parse_buffer, Sjson_parse_buffer,
The keyword argument `:object-type' specifies which Lisp type is used
to represent objects; it can be `hash-table', `alist' or `plist'. It
-defaults to `hash-table'.
+defaults to `hash-table'. If an object has members with the same
+key, `hash-table' keeps only the last value of such keys, while
+`alist' and `plist' keep all the members.
The keyword argument `:array-type' specifies which Lisp type is used
to represent arrays; it can be `array' (the default) or `list'.
@@ -1049,42 +1926,33 @@ DEFUN ("json-parse-buffer", Fjson_parse_buffer, Sjson_parse_buffer,
The keyword argument `:false-object' specifies which object to use to
represent a JSON false value. It defaults to `:false'.
usage: (json-parse-buffer &rest args) */)
- (ptrdiff_t nargs, Lisp_Object *args)
+(ptrdiff_t nargs, Lisp_Object *args)
{
specpdl_ref count = SPECPDL_INDEX ();
-#ifdef WINDOWSNT
- ensure_json_available ();
-#endif
-
- struct json_configuration conf =
- {json_object_hashtable, json_array_array, QCnull, QCfalse};
+ struct json_configuration conf
+ = { json_object_hashtable, json_array_array, QCnull, QCfalse };
json_parse_args (nargs, args, &conf, true);
- ptrdiff_t point = PT_BYTE;
- struct json_read_buffer_data data = {.point = point};
- json_error_t error;
- json_t *object
- = json_load_callback (json_read_buffer_callback, &data,
- JSON_DECODE_ANY
- | JSON_DISABLE_EOF_CHECK
- | JSON_ALLOW_NUL,
- &error);
-
- if (object == NULL)
- json_parse_error (&error);
-
- /* Avoid leaking the object in case of further errors. */
- record_unwind_protect_ptr (json_release_object, object);
-
- /* Convert and then move point only if everything succeeded. */
- Lisp_Object lisp = json_to_lisp (object, &conf);
+ struct json_parser p;
+ unsigned char *begin = PT_ADDR;
+ unsigned char *end = GPT_ADDR;
+ unsigned char *secondary_begin = NULL;
+ unsigned char *secondary_end = NULL;
+ if (GPT_ADDR < Z_ADDR)
+ {
+ secondary_begin = GAP_END_ADDR;
+ if (secondary_begin < PT_ADDR)
+ secondary_begin = PT_ADDR;
+ secondary_end = Z_ADDR;
+ }
- /* Adjust point by how much we just read. */
- point += error.position;
- SET_PT_BOTH (BYTE_TO_CHAR (point), point);
+ json_parser_init (&p, conf, begin, end, secondary_begin,
+ secondary_end);
+ record_unwind_protect_ptr (json_parser_done, &p);
- return unbind_to (count, lisp);
+ return unbind_to (count,
+ json_parse (&p, PARSEENDBEHAVIOR_MovePoint));
}
void
@@ -1102,6 +1970,10 @@ syms_of_json (void)
DEFSYM (Qjson_end_of_file, "json-end-of-file");
DEFSYM (Qjson_trailing_content, "json-trailing-content");
DEFSYM (Qjson_object_too_deep, "json-object-too-deep");
+ DEFSYM (Qjson_utf8_decode_error, "json-utf8-decode-error")
+ DEFSYM (Qjson_invalid_surrogate_error, "json-invalid-surrogate-error")
+ DEFSYM (Qjson_number_out_of_range, "json-number-out-of-range-error")
+ DEFSYM (Qjson_escape_sequence_error, "json-escape-sequence-error")
DEFSYM (Qjson_unavailable, "json-unavailable");
define_error (Qjson_error, "generic JSON error", Qerror);
define_error (Qjson_out_of_memory,
@@ -1113,6 +1985,14 @@ syms_of_json (void)
Qjson_parse_error);
define_error (Qjson_object_too_deep,
"object cyclic or Lisp evaluation too deep", Qjson_error);
+ define_error (Qjson_utf8_decode_error,
+ "invalid utf-8 encoding", Qjson_error);
+ define_error (Qjson_invalid_surrogate_error,
+ "invalid surrogate pair", Qjson_error);
+ define_error (Qjson_number_out_of_range,
+ "number out of range", Qjson_error);
+ define_error (Qjson_escape_sequence_error,
+ "invalid escape sequence", Qjson_parse_error);
DEFSYM (Qpure, "pure");
DEFSYM (Qside_effect_free, "side-effect-free");
--
2.42.0
^ permalink raw reply related [flat|nested] 21+ messages in thread
* Re: [PATCH] Implement fast verisons of json-parse functions
2024-03-23 10:40 ` Herman, Géza
@ 2024-03-23 11:11 ` Eli Zaretskii
2024-03-30 7:43 ` Eli Zaretskii
1 sibling, 0 replies; 21+ messages in thread
From: Eli Zaretskii @ 2024-03-23 11:11 UTC (permalink / raw)
To: Géza Herman; +Cc: emacs-devel
> From: Herman, Géza <geza.herman@gmail.com>
> Cc: Géza Herman <geza.herman@gmail.com>,
> emacs-devel@gnu.org
> Date: Sat, 23 Mar 2024 11:40:41 +0100
>
> Eli Zaretskii <eliz@gnu.org> writes:
>
> >> If we want to have conditional compilation, is there a simple
> >> similar conditional compilation case already in Emacs? So I can
> >> copy the approach easily (supposedly I need to add a flag to
> >> configure, it needs to add a -DUSE_OWN_JSON_PARSER switch to the
> >> compiller, etc.)
> >
> > Let's wait with this until we have a clear idea whether we need
> > this. My suggestion was just to present a possibility, not
> > necessarily a decision that we should have this. I'm not sure yet
> > whether we need to offer the possibility of using libjansson for
> > parsing JSON as an option.
>
> What is the process for deciding this?
We install the new parser, and wait for complaints about it that could
justify letting people optionally use libjansson. Like performance
problems that only happen sometimes and cannot be fixed in the new
parser, or some other similar issues. If and when such complaints
come up, we consider the possibility of leaving libjansson-based
parser in our sources.
> I attached the latest version of the parser. I reverted the Lisp
> vector based workspace to use a malloc based one, and added a note
> regarding that the GC most not run during parsing. I also
> implemented some additional optimizations, this version runs ~10x
> faster on my LSP dataset. I also run the parser on all the JSON
> files I found on my computer (~60,000 files, 1.5 GiB), no problems
> were found. The parser runs ~6.5x faster (3.5 sec vs 23.4 sec) on
> this dataset.
Thanks. I'll wait for a few days before installing this, in case
there are objections or further comments.
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH] Implement fast verisons of json-parse functions
2024-03-23 10:40 ` Herman, Géza
2024-03-23 11:11 ` Eli Zaretskii
@ 2024-03-30 7:43 ` Eli Zaretskii
2024-03-30 10:50 ` Herman, Géza
1 sibling, 1 reply; 21+ messages in thread
From: Eli Zaretskii @ 2024-03-30 7:43 UTC (permalink / raw)
To: Géza Herman; +Cc: emacs-devel
> From: Herman, Géza <geza.herman@gmail.com>
> Cc: Géza Herman <geza.herman@gmail.com>,
> emacs-devel@gnu.org
> Date: Sat, 23 Mar 2024 11:40:41 +0100
>
> I attached the latest version of the parser. I reverted the Lisp
> vector based workspace to use a malloc based one, and added a note
> regarding that the GC most not run during parsing. I also
> implemented some additional optimizations, this version runs ~10x
> faster on my LSP dataset. I also run the parser on all the JSON
> files I found on my computer (~60,000 files, 1.5 GiB), no problems
> were found. The parser runs ~6.5x faster (3.5 sec vs 23.4 sec) on
> this dataset.
Thanks. I installed this on the master branch, after adding the
required commit log messages and some cleanup of unused functions.
However:
. 3 tests in test/src/json-tests.el are now failing, where they
succeeded before; see the log at the end
. the times of the relevant tests don't seem to be faster than
the libjansson version, perhaps because this is an unoptimized
build
Here are the failed tests; please see how to fix them:
Test json-parse-string/invalid-unicode backtrace:
signal(ert-test-failed (((should-error (json-parse-string "[\"\200\"
ert-fail(((should-error (json-parse-string "[\"\200\"]") :type 'json
ert--should-error-handle-error((closure ((form-description-404 (shou
(condition-case -condition- (unwind-protect (setq value-402 (apply f
(let ((errorp405 nil) (form-description-fn-406 #'(lambda nil form-de
(let (form-description-404) (let ((errorp405 nil) (form-description-
(let ((value-402 'ert-form-evaluation-aborted-403)) (let (form-descr
(let* ((fn-400 #'json-parse-string) (args-401 (condition-case err (l
(closure (t) nil (let* ((fn-395 #'fboundp) (args-396 (condition-case
#f(compiled-function () #<bytecode 0x1581ad7c20e41085>)()
handler-bind-1(#f(compiled-function () #<bytecode 0x1581ad7c20e41085
ert--run-test-internal(#s(ert--test-execution-info :test #s(ert-test
ert-run-test(#s(ert-test :name json-parse-string/invalid-unicode :do
ert-run-or-rerun-test(#s(ert--stats :selector ... :tests ... :test-m
ert-run-tests((not (or (tag :unstable) (tag :nativecomp))) #f(compil
ert-run-tests-batch((not (or (tag :unstable) (tag :nativecomp))))
ert-run-tests-batch-and-exit((not (or (tag :unstable) (tag :nativeco
eval((ert-run-tests-batch-and-exit '(not (or (tag :unstable) (tag :n
command-line-1(("-L" ";." "-l" "ert" "--eval" "(setq treesit-extra-l
command-line()
normal-top-level()
Test json-parse-string/invalid-unicode condition:
(ert-test-failed
((should-error (json-parse-string "[\"\200\"]") :type
'json-parse-error)
:form (json-parse-string "[\"\200\"]") :condition
(json-utf8-decode-error 1 3 3) :fail-reason
"the error signaled did not have the expected type"))
FAILED 7/21 json-parse-string/invalid-unicode (0.000198 sec) at src/json-tests.el:196
Test json-parse-string/null backtrace:
signal(ert-test-failed (((should-error (json-parse-string "\0") :typ
ert-fail(((should-error (json-parse-string "\0") :type 'wrong-type-a
ert--should-error-handle-error((closure ((form-description-377 (shou
(condition-case -condition- (unwind-protect (setq value-375 (apply f
(let ((errorp378 nil) (form-description-fn-379 #'(lambda nil form-de
(let (form-description-377) (let ((errorp378 nil) (form-description-
(let ((value-375 'ert-form-evaluation-aborted-376)) (let (form-descr
(let* ((fn-373 #'json-parse-string) (args-374 (condition-case err (l
(closure (t) nil (let* ((fn-368 #'fboundp) (args-369 (condition-case
#f(compiled-function () #<bytecode 0x1581ad7c20e41085>)()
handler-bind-1(#f(compiled-function () #<bytecode 0x1581ad7c20e41085
ert--run-test-internal(#s(ert--test-execution-info :test #s(ert-test
ert-run-test(#s(ert-test :name json-parse-string/null :documentation
ert-run-or-rerun-test(#s(ert--stats :selector ... :tests ... :test-m
ert-run-tests((not (or (tag :unstable) (tag :nativecomp))) #f(compil
ert-run-tests-batch((not (or (tag :unstable) (tag :nativecomp))))
ert-run-tests-batch-and-exit((not (or (tag :unstable) (tag :nativeco
eval((ert-run-tests-batch-and-exit '(not (or (tag :unstable) (tag :n
command-line-1(("-L" ";." "-l" "ert" "--eval" "(setq treesit-extra-l
command-line()
normal-top-level()
Test json-parse-string/null condition:
(ert-test-failed
((should-error (json-parse-string "\0") :type 'wrong-type-argument)
:form (json-parse-string "\0") :condition (json-end-of-file 1 1 1)
:fail-reason "the error signaled did not have the expected type"))
FAILED 8/21 json-parse-string/null (0.000281 sec) at src/json-tests.el:187
Test json-parse-string/object backtrace:
signal(ert-test-failed (((should (equal (json-parse-string input :ob
ert-fail(((should (equal (json-parse-string input :object-type 'alis
(if (unwind-protect (setq value-231 (apply fn-229 args-230)) (setq f
(let (form-description-233) (if (unwind-protect (setq value-231 (app
(let ((value-231 'ert-form-evaluation-aborted-232)) (let (form-descr
(let* ((fn-229 #'equal) (args-230 (condition-case err (list (json-pa
(let ((input "{ \"abc\" : [1, 2, true], \"def\" : null, \"abc\" : [9
(closure (t) nil (let* ((fn-209 #'fboundp) (args-210 (condition-case
#f(compiled-function () #<bytecode 0x1581ad7c20e41085>)()
handler-bind-1(#f(compiled-function () #<bytecode 0x1581ad7c20e41085
ert--run-test-internal(#s(ert--test-execution-info :test #s(ert-test
ert-run-test(#s(ert-test :name json-parse-string/object :documentati
ert-run-or-rerun-test(#s(ert--stats :selector ... :tests ... :test-m
ert-run-tests((not (or (tag :unstable) (tag :nativecomp))) #f(compil
ert-run-tests-batch((not (or (tag :unstable) (tag :nativecomp))))
ert-run-tests-batch-and-exit((not (or (tag :unstable) (tag :nativeco
eval((ert-run-tests-batch-and-exit '(not (or (tag :unstable) (tag :n
command-line-1(("-L" ";." "-l" "ert" "--eval" "(setq treesit-extra-l
command-line()
normal-top-level()
Test json-parse-string/object condition:
(ert-test-failed
((should
(equal (json-parse-string input :object-type ...) '(... ...)))
:form
(equal ((abc . [1 2 t]) (def . :null) (abc . [9 :false]))
((abc . [9 :false]) (def . :null)))
:value nil :explanation
(proper-lists-of-different-length 3 2
((abc . [1 2 t]) (def . :null)
(abc . [9 :false]))
((abc . [9 :false]) (def . :null))
first-mismatch-at 0)))
FAILED 9/21 json-parse-string/object (0.000276 sec) at src/json-tests.el:135
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH] Implement fast verisons of json-parse functions
2024-03-30 7:43 ` Eli Zaretskii
@ 2024-03-30 10:50 ` Herman, Géza
2024-03-30 13:00 ` Eli Zaretskii
0 siblings, 1 reply; 21+ messages in thread
From: Herman, Géza @ 2024-03-30 10:50 UTC (permalink / raw)
To: Eli Zaretskii; +Cc: Géza Herman, emacs-devel
Eli Zaretskii <eliz@gnu.org> writes:
> Thanks. I installed this on the master branch, after adding the
> required commit log messages and some cleanup of unused
> functions.
Thanks!
> However:
>
> . 3 tests in test/src/json-tests.el are now failing, where
> they
> succeeded before; see the log at the end
> . the times of the relevant tests don't seem to be faster than
> the libjansson version, perhaps because this is an
> unoptimized
> build
3 test failures:
1. Handling of utf-8 decode errors: the new parser emits
json-utf8-decode-error instead of json-parse-error (this is what
the test expects). I can fix this by modifying the test
2. Handling of a single \0 byte: the new parser emits
json-end-of-file. I think this is not the best error kind for
this case, so I'll modify the parser to emit json-parse-error
instead. This is still different what the test expects
(wrong-type-argument), but I think there is no reason to treat
zero bytes specially. Considering the JSON spec, it's the same
error as any other unexpected byte value.
3. Handling objects with duplicate keys. That's an interesting
one. With alist/plist objects, the old parser removed duplicate
members, but the new parser doesn't remove such members, it keeps
them all. The JSON spec doesn't really say anything about this
case, so I think we're free to do anything we like. Mattias
Engdegård had an interesting idea: what if we put alist/plist
members in reversed order? This way, if one uses assq/plist-get to
get values by keys, the behavior will be consistent with the hash
table representation (which keeps that last value of duplicate
keys). I like the idea of consistency, but I don't like that the
elements will become reversed after parsing. I had the idea that
if the hash table kept the first value of duplicate keys, then
we'd also have consistency. What do you think?
Regarding performance: the new parser only becames significantly
faster on larger JSONs only. And yes, unoptimized build also has
an impact on performance.
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH] Implement fast verisons of json-parse functions
2024-03-30 10:50 ` Herman, Géza
@ 2024-03-30 13:00 ` Eli Zaretskii
2024-03-30 18:36 ` Herman, Géza
0 siblings, 1 reply; 21+ messages in thread
From: Eli Zaretskii @ 2024-03-30 13:00 UTC (permalink / raw)
To: Géza Herman, Mattias Engdegård; +Cc: emacs-devel
> From: Herman, Géza <geza.herman@gmail.com>
> Cc: Géza Herman <geza.herman@gmail.com>,
> emacs-devel@gnu.org
> Date: Sat, 30 Mar 2024 11:50:19 +0100
>
>
> > . 3 tests in test/src/json-tests.el are now failing, where
> > they
> > succeeded before; see the log at the end
> > . the times of the relevant tests don't seem to be faster than
> > the libjansson version, perhaps because this is an
> > unoptimized
> > build
>
> 3 test failures:
> 1. Handling of utf-8 decode errors: the new parser emits
> json-utf8-decode-error instead of json-parse-error (this is what
> the test expects). I can fix this by modifying the test
OK, but we will need to mention this in NEWS as an incompatible
change.
> 2. Handling of a single \0 byte: the new parser emits
> json-end-of-file. I think this is not the best error kind for
> this case, so I'll modify the parser to emit json-parse-error
> instead. This is still different what the test expects
> (wrong-type-argument), but I think there is no reason to treat
> zero bytes specially. Considering the JSON spec, it's the same
> error as any other unexpected byte value.
Does JSON allow null bytes in its strings? If not, why
wrong-type-argument is not TRT?
> 3. Handling objects with duplicate keys. That's an interesting
> one. With alist/plist objects, the old parser removed duplicate
> members, but the new parser doesn't remove such members, it keeps
> them all. The JSON spec doesn't really say anything about this
> case, so I think we're free to do anything we like. Mattias
> Engdegård had an interesting idea: what if we put alist/plist
> members in reversed order? This way, if one uses assq/plist-get to
> get values by keys, the behavior will be consistent with the hash
> table representation (which keeps that last value of duplicate
> keys). I like the idea of consistency, but I don't like that the
> elements will become reversed after parsing. I had the idea that
> if the hash table kept the first value of duplicate keys, then
> we'd also have consistency. What do you think?
I think we should modify the expected results of the test to match the
new behavior, and leave the order as it is now.
But please also compare with what the Lisp implementation does in
these cases, as that could give us further ideas or make us
reconsider.
Mattias, any comments or suggestions?
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH] Implement fast verisons of json-parse functions
2024-03-30 13:00 ` Eli Zaretskii
@ 2024-03-30 18:36 ` Herman, Géza
2024-03-30 20:18 ` [External] : " Drew Adams
2024-03-31 5:08 ` Eli Zaretskii
0 siblings, 2 replies; 21+ messages in thread
From: Herman, Géza @ 2024-03-30 18:36 UTC (permalink / raw)
To: Eli Zaretskii; +Cc: Géza Herman, Mattias Engdegård, emacs-devel
Eli Zaretskii <eliz@gnu.org> writes:
>> From: Herman, Géza <geza.herman@gmail.com>
>> 3 test failures:
>> 1. Handling of utf-8 decode errors: the new parser emits
>> json-utf8-decode-error instead of json-parse-error (this is
>> what
>> the test expects). I can fix this by modifying the test
>
> OK, but we will need to mention this in NEWS as an incompatible
> change.
Yes. I'm just mentioning this as an alternative solution:
originally the parser emitted json-parse-error for this, it was
changed during the review. So if we prefer maintaining
compatibility, it's easy to revert this change.
>> 2. Handling of a single \0 byte
>
> Does JSON allow null bytes in its strings? If not, why
> wrong-type-argument is not TRT?
That's correct, null bytes are not allowed (anywhere, not just in
strings). But my point is that the old parser made a special
distinction here. It is not just null bytes which is not allowed
in JSON, but for example, \x01 isn't allowed either. But, for
null bytes, the old parser gives a different error message than
for \x01 bytes. But from the JSON spec perspective, both \x00 and
\x01 are forbidden in the same way. I don't know why null bytes
are handled specially in this regard, so I didn't follow this
behavior in my parser. Maybe this special error case was added
because libjansson couldn't parse strings with null bytes back
then (because the API only accepted zero-terminated strings)?
To me, wrong-type-argument means that the input argument to the
parser is incorrect. Like it's not a string, but an integer. But
here, the parser gets a string, it's just that the string has null
bytes in it somewhere. The type of the argument to json-parse-*
is fine, it's the value which has the problem. So the parser
should give some kind of json-error in my opinion, not
wrong-type-argument. But, of, course, if we consider
strings-with-null and strings-without-null as two different types,
then the wrong-type-argument error makes sense (though I don't
know why we'd want to do this).
>> 3. Handling objects with duplicate keys.
>
> I think we should modify the expected results of the test to
> match the
> new behavior, and leave the order as it is now.
OK.
> But please also compare with what the Lisp implementation does
> in
> these cases, as that could give us further ideas or make us
> reconsider.
I checked json-read, and it seems that it has the exact same
behavior that my parser has. I thought that json-read can only
produce one format, but it turned out it has json-object-type and
json-array-type variables, so it can produce the same variety of
output that the C-based parsers can do. I think that the doc of
json-read should mention this fact. Anyways, the doc says:
(defvar json-object-type 'alist
"Type to convert JSON objects to.
Must be one of `alist', `plist', or `hash-table'. Consider
let-binding
this around your call to `json-read' instead of `setq'ing it.
Ordering
is maintained for `alist' and `plist', but not for `hash-table'.")
I played with this a little bit, and it works as described (for
hash tables, it keeps the last key-value pair).
I think this behavior is important, because this is used when
pretty-formatting JSON. Pretty formatting shouldn't remove
duplicate entries, nor change the ordering of members. Because
the new parser also behaves like this, it can be used to speed up
pretty formatting as well (yeah, I know, half of it, as there is
no new to-JSON serializer implemented yet).
^ permalink raw reply [flat|nested] 21+ messages in thread
* RE: [External] : Re: [PATCH] Implement fast verisons of json-parse functions
2024-03-30 18:36 ` Herman, Géza
@ 2024-03-30 20:18 ` Drew Adams
2024-03-30 20:51 ` Herman, Géza
2024-03-31 5:08 ` Eli Zaretskii
1 sibling, 1 reply; 21+ messages in thread
From: Drew Adams @ 2024-03-30 20:18 UTC (permalink / raw)
To: Herman, Géza, Eli Zaretskii
Cc: Mattias Engdegård, emacs-devel@gnu.org
> >> 2. Handling of a single \0 byte
> >
> > Does JSON allow null bytes in its strings?
Strict JSON doesn't allow null characters (U+0000).
But does this parser only support strict JSON?
Much JSON in the wild uses various forms of lax
syntax. There, null chars are often allowed.
> >> 3. Handling objects with duplicate keys.
The JSON standard _recommends_ that a JSON object
not have duplicate field names. But it doesn't
prohibit this, for a field name to be well-formed.
This is another case where feral JSON sometimes
departs from recommended syntax.
It's good if a JSON parser can use _either_ strict
or some version of a lax JSON syntax, au choix.
Lax-syntax support can be important for practical
(real world) reasons.
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [External] : Re: [PATCH] Implement fast verisons of json-parse functions
2024-03-30 20:18 ` [External] : " Drew Adams
@ 2024-03-30 20:51 ` Herman, Géza
2024-03-30 23:28 ` Drew Adams
0 siblings, 1 reply; 21+ messages in thread
From: Herman, Géza @ 2024-03-30 20:51 UTC (permalink / raw)
To: Drew Adams
Cc: Herman, Géza, Eli Zaretskii, Mattias Engdegård,
emacs-devel@gnu.org
Drew Adams <drew.adams@oracle.com> writes:
>> >> 2. Handling of a single \0 byte
>> >
>> > Does JSON allow null bytes in its strings?
>
> Strict JSON doesn't allow null characters (U+0000).
> But does this parser only support strict JSON?
This parser only accepts JSONs which are according to the spec (in
theory). But this is not different from the libjansson based
parser in this regard. That also doesn't allow null characters in
JSON.
Just to be sure that we're on the same page: this means that the
parser doesn't support actual null bytes in the stream. But if a
user wants to put a U+0000 character in a string, they can do that
by using "\u0000".
^ permalink raw reply [flat|nested] 21+ messages in thread
* RE: [External] : Re: [PATCH] Implement fast verisons of json-parse functions
2024-03-30 20:51 ` Herman, Géza
@ 2024-03-30 23:28 ` Drew Adams
0 siblings, 0 replies; 21+ messages in thread
From: Drew Adams @ 2024-03-30 23:28 UTC (permalink / raw)
To: Herman, Géza
Cc: Eli Zaretskii, Mattias Engdegård, emacs-devel@gnu.org
> Drew Adams writes:
>
> >> >> 2. Handling of a single \0 byte
> >> >
> >> > Does JSON allow null bytes in its strings?
> >
> > Strict JSON doesn't allow null characters (U+0000).
> > But does this parser only support strict JSON?
>
> This parser only accepts JSONs which are according to the spec (in
> theory). But this is not different from the libjansson based
> parser in this regard. That also doesn't allow null characters in
> JSON.
It might be good if in the future there were
_also_ a way to parse lax (real-world) JSON data.
Not that strict JSON isn't real-world. It's just
that there's also lots of real-world JSON data
that's not strict. And yes, there are parsers
that handle it.
> Just to be sure that we're on the same page: this means that the
> parser doesn't support actual null bytes in the stream. But if a
> user wants to put a U+0000 character in a string, they can do that
> by using "\u0000".
Yes, we're on the same page. I meant _unescaped_
NUL (^@, U+0000, \0) chars - I shoulda said that.
Often a lax parser treats all ASCII control chars,
including U+0000, as insignificant whitespace.
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH] Implement fast verisons of json-parse functions
2024-03-30 18:36 ` Herman, Géza
2024-03-30 20:18 ` [External] : " Drew Adams
@ 2024-03-31 5:08 ` Eli Zaretskii
2024-03-31 14:51 ` Mattias Engdegård
2024-03-31 17:48 ` Adam Porter
1 sibling, 2 replies; 21+ messages in thread
From: Eli Zaretskii @ 2024-03-31 5:08 UTC (permalink / raw)
To: Géza Herman; +Cc: mattiase, emacs-devel
> From: Herman, Géza <geza.herman@gmail.com>
> Cc: Géza Herman <geza.herman@gmail.com>, Mattias
> Engdegård
> <mattiase@acm.org>, emacs-devel@gnu.org
> Date: Sat, 30 Mar 2024 19:36:57 +0100
>
>
> Eli Zaretskii <eliz@gnu.org> writes:
>
> >> From: Herman, Géza <geza.herman@gmail.com>
> >> 3 test failures:
> >> 1. Handling of utf-8 decode errors: the new parser emits
> >> json-utf8-decode-error instead of json-parse-error (this is
> >> what
> >> the test expects). I can fix this by modifying the test
> >
> > OK, but we will need to mention this in NEWS as an incompatible
> > change.
>
> Yes. I'm just mentioning this as an alternative solution:
> originally the parser emitted json-parse-error for this, it was
> changed during the review. So if we prefer maintaining
> compatibility, it's easy to revert this change.
I don't think we should revert it, this error is fine.
> >> 2. Handling of a single \0 byte
> >
> > Does JSON allow null bytes in its strings? If not, why
> > wrong-type-argument is not TRT?
>
> That's correct, null bytes are not allowed (anywhere, not just in
> strings). But my point is that the old parser made a special
> distinction here. It is not just null bytes which is not allowed
> in JSON, but for example, \x01 isn't allowed either. But, for
> null bytes, the old parser gives a different error message than
> for \x01 bytes. But from the JSON spec perspective, both \x00 and
> \x01 are forbidden in the same way. I don't know why null bytes
> are handled specially in this regard, so I didn't follow this
> behavior in my parser. Maybe this special error case was added
> because libjansson couldn't parse strings with null bytes back
> then (because the API only accepted zero-terminated strings)?
libjansson couldn't parse such strings because it uses a C interface,
and C terminates strings at the first null byte.
> To me, wrong-type-argument means that the input argument to the
> parser is incorrect. Like it's not a string, but an integer. But
> here, the parser gets a string, it's just that the string has null
> bytes in it somewhere.
Well, it's "kind-of" wrong argument: a string with embedded null
bytes. But I'm also interested to hear from Mattias on this (and from
anyone else who might have an opinion about the kind of error we
should signal in this case).
> > But please also compare with what the Lisp implementation does in
> > these cases, as that could give us further ideas or make us
> > reconsider.
>
> I checked json-read, and it seems that it has the exact same
> behavior that my parser has. I thought that json-read can only
> produce one format, but it turned out it has json-object-type and
> json-array-type variables, so it can produce the same variety of
> output that the C-based parsers can do. I think that the doc of
> json-read should mention this fact. Anyways, the doc says:
>
> (defvar json-object-type 'alist
> "Type to convert JSON objects to.
> Must be one of `alist', `plist', or `hash-table'. Consider
> let-binding
> this around your call to `json-read' instead of `setq'ing it.
> Ordering
> is maintained for `alist' and `plist', but not for `hash-table'.")
Please suggest the change for the doc string.
> I think this behavior is important, because this is used when
> pretty-formatting JSON. Pretty formatting shouldn't remove
> duplicate entries, nor change the ordering of members. Because
> the new parser also behaves like this, it can be used to speed up
> pretty formatting as well (yeah, I know, half of it, as there is
> no new to-JSON serializer implemented yet).
There is now, it was installed yesterday evening.
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH] Implement fast verisons of json-parse functions
2024-03-31 5:08 ` Eli Zaretskii
@ 2024-03-31 14:51 ` Mattias Engdegård
2024-03-31 15:44 ` Eli Zaretskii
2024-03-31 17:48 ` Adam Porter
1 sibling, 1 reply; 21+ messages in thread
From: Mattias Engdegård @ 2024-03-31 14:51 UTC (permalink / raw)
To: Eli Zaretskii; +Cc: Géza Herman, emacs-devel
31 mars 2024 kl. 07.08 skrev Eli Zaretskii <eliz@gnu.org>:
> Well, it's "kind-of" wrong argument: a string with embedded null
> bytes. But I'm also interested to hear from Mattias on this (and from
> anyone else who might have an opinion about the kind of error we
> should signal in this case).
I don't think users care much about the exact error we produce as long as it's reasonably descriptive. It's only when user might reasonably discriminate between different errors programmatically that it really matters. We certainly shouldn't have a special message for NULs.
I spent a slow morning speeding up the new JSON parser substantially by not doing stupid work, and all UTF-8 decoding errors will now be `json-utf8-decode-error` instead of `json-parse-error`. I think we can live with that.
The patch, which will be pushed shortly, also fixes a few long-standing bugs, such as
(json-parse-string "{\"é\":1}" :object-type 'plist)
which resulted in a mangled symbol name.
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH] Implement fast verisons of json-parse functions
2024-03-31 14:51 ` Mattias Engdegård
@ 2024-03-31 15:44 ` Eli Zaretskii
2024-04-01 8:48 ` Mattias Engdegård
0 siblings, 1 reply; 21+ messages in thread
From: Eli Zaretskii @ 2024-03-31 15:44 UTC (permalink / raw)
To: Mattias Engdegård; +Cc: geza.herman, emacs-devel
> From: Mattias Engdegård <mattias.engdegard@gmail.com>
> Date: Sun, 31 Mar 2024 16:51:34 +0200
> Cc: Géza Herman <geza.herman@gmail.com>, emacs-devel@gnu.org
>
> 31 mars 2024 kl. 07.08 skrev Eli Zaretskii <eliz@gnu.org>:
>
> > Well, it's "kind-of" wrong argument: a string with embedded null
> > bytes. But I'm also interested to hear from Mattias on this (and from
> > anyone else who might have an opinion about the kind of error we
> > should signal in this case).
>
> I don't think users care much about the exact error we produce as long as it's reasonably descriptive. It's only when user might reasonably discriminate between different errors programmatically that it really matters. We certainly shouldn't have a special message for NULs.
>
> I spent a slow morning speeding up the new JSON parser substantially by not doing stupid work, and all UTF-8 decoding errors will now be `json-utf8-decode-error` instead of `json-parse-error`. I think we can live with that.
I'm okay with changing the errors we signal, but let's please document
in NEWS all the changes in this area.
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH] Implement fast verisons of json-parse functions
2024-03-31 5:08 ` Eli Zaretskii
2024-03-31 14:51 ` Mattias Engdegård
@ 2024-03-31 17:48 ` Adam Porter
1 sibling, 0 replies; 21+ messages in thread
From: Adam Porter @ 2024-03-31 17:48 UTC (permalink / raw)
To: eliz; +Cc: emacs-devel, geza.herman, mattiase
Hi Eli,
> Well, it's "kind-of" wrong argument: a string with embedded null
> bytes. But I'm also interested to hear from Mattias on this (and from
> anyone else who might have an opinion about the kind of error we
> should signal in this case).
Well, since you asked: I tend to agree with Géza that a string
containing a null byte is less of a wrong-type-argument and more of a
parsing or decoding error. I think of a wrong-type-argument as, e.g.
nil instead of a string, etc. IOW, an argument that's wrong on its
face, by virtue of its type, rather than any internal characteristics.
--Adam
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH] Implement fast verisons of json-parse functions
2024-03-31 15:44 ` Eli Zaretskii
@ 2024-04-01 8:48 ` Mattias Engdegård
2024-04-01 16:54 ` Herman, Géza
0 siblings, 1 reply; 21+ messages in thread
From: Mattias Engdegård @ 2024-04-01 8:48 UTC (permalink / raw)
To: Eli Zaretskii; +Cc: geza.herman, emacs-devel
31 mars 2024 kl. 17.44 skrev Eli Zaretskii <eliz@gnu.org>:
> I'm okay with changing the errors we signal, but let's please document
> in NEWS all the changes in this area.
Right. The new code is now in master.
It made for a parsing speed-up of about 2x, often more.
I also fixed the unintuitive `end-of-file` errors from (json-parse-string "\0"). It had nothing to do with NULs, actually.
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH] Implement fast verisons of json-parse functions
2024-04-01 8:48 ` Mattias Engdegård
@ 2024-04-01 16:54 ` Herman, Géza
0 siblings, 0 replies; 21+ messages in thread
From: Herman, Géza @ 2024-04-01 16:54 UTC (permalink / raw)
To: Mattias Engdegård; +Cc: Eli Zaretskii, geza.herman, emacs-devel
Mattias Engdegård <mattias.engdegard@gmail.com> writes:
> 31 mars 2024 kl. 17.44 skrev Eli Zaretskii <eliz@gnu.org>:
>
>> I'm okay with changing the errors we signal, but let's please
>> document
>> in NEWS all the changes in this area.
>
> Right. The new code is now in master.
> It made for a parsing speed-up of about 2x, often more.
>
> I also fixed the unintuitive `end-of-file` errors from
> (json-parse-string "\0"). It had nothing to do with NULs,
> actually.
Nice, thank you! Are there any problems remaining that may some
help from me?
^ permalink raw reply [flat|nested] 21+ messages in thread
end of thread, other threads:[~2024-04-01 16:54 UTC | newest]
Thread overview: 21+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-03-19 18:23 [PATCH] Implement fast verisons of json-parse functions Herman, Géza
2024-03-19 18:47 ` Eli Zaretskii
2024-03-19 18:50 ` Herman, Géza
2024-03-19 19:28 ` Eli Zaretskii
2024-03-19 19:50 ` Herman, Géza
2024-03-20 3:37 ` Eli Zaretskii
2024-03-23 10:40 ` Herman, Géza
2024-03-23 11:11 ` Eli Zaretskii
2024-03-30 7:43 ` Eli Zaretskii
2024-03-30 10:50 ` Herman, Géza
2024-03-30 13:00 ` Eli Zaretskii
2024-03-30 18:36 ` Herman, Géza
2024-03-30 20:18 ` [External] : " Drew Adams
2024-03-30 20:51 ` Herman, Géza
2024-03-30 23:28 ` Drew Adams
2024-03-31 5:08 ` Eli Zaretskii
2024-03-31 14:51 ` Mattias Engdegård
2024-03-31 15:44 ` Eli Zaretskii
2024-04-01 8:48 ` Mattias Engdegård
2024-04-01 16:54 ` Herman, Géza
2024-03-31 17:48 ` Adam Porter
Code repositories for project(s) associated with this external index
https://git.savannah.gnu.org/cgit/emacs.git
https://git.savannah.gnu.org/cgit/emacs/org-mode.git
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.