* bug#70007: [PATCH] native JSON encoder
@ 2024-03-26 15:33 Mattias Engdegård
2024-03-26 16:46 ` Eli Zaretskii
0 siblings, 1 reply; 37+ messages in thread
From: Mattias Engdegård @ 2024-03-26 15:33 UTC (permalink / raw)
To: 70007
[-- Attachment #1: Type: text/plain, Size: 897 bytes --]
If we replace the lisp-to-JSON encoder with native code, we would not need the jansson library for it and it would be faster.
There is ongoing work on a JSON-to-lisp parser, but the author has made it clear that he does not have time to write an encoder, so I spent a morning mashing up the attached patch.
It generally produces the same result as the old code, except:
- The old code incorrectly accepted strings with non-Unicode characters (raw bytes). There is no reason to do this; JSON is UTF-8 only.
- The old code spent a lot of time ensuring that object keys were unique. The new code doesn't: it's a garbage-in garbage-out type of situation.
The new code could do with some optimisation but it's already about twice as fast as the old code, sometimes more.
I'd be very happy if someone could test it with packages that use this interface (json-serialise, json-insert).
[-- Attachment #2: json-serialise.diff --]
[-- Type: application/octet-stream, Size: 27596 bytes --]
diff --git a/src/json.c b/src/json.c
index e849ccaf722..7fbe700e07c 100644
--- a/src/json.c
+++ b/src/json.c
@@ -23,6 +23,7 @@ Copyright (C) 2017-2024 Free Software Foundation, Inc.
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
+#include <math.h>
#include <jansson.h>
@@ -231,12 +232,6 @@ json_encode (Lisp_Object string)
return encode_string_utf_8 (string, Qnil, false, Qt, Qt);
}
-static AVOID
-json_out_of_memory (void)
-{
- xsignal0 (Qjson_out_of_memory);
-}
-
/* Signal a Lisp error corresponding to the JSON ERROR. */
static AVOID
@@ -289,26 +284,6 @@ check_string_without_embedded_nulls (Lisp_Object object)
Qstring_without_embedded_nulls_p, object);
}
-/* Signal an error of type `json-out-of-memory' if OBJECT is
- NULL. */
-
-static json_t *
-json_check (json_t *object)
-{
- if (object == NULL)
- json_out_of_memory ();
- return object;
-}
-
-/* If STRING is not a valid UTF-8 string, signal an error of type
- `wrong-type-argument'. STRING must be a unibyte string. */
-
-static void
-json_check_utf8 (Lisp_Object string)
-{
- CHECK_TYPE (utf8_string_p (string), Qutf_8_string_p, string);
-}
-
enum json_object_type {
json_object_hashtable,
json_object_alist,
@@ -327,179 +302,6 @@ json_check_utf8 (Lisp_Object string)
Lisp_Object false_object;
};
-static json_t *lisp_to_json (Lisp_Object,
- const struct json_configuration *conf);
-
-/* Convert a Lisp object to a nonscalar JSON object (array or object). */
-
-static json_t *
-lisp_to_json_nonscalar_1 (Lisp_Object lisp,
- const struct json_configuration *conf)
-{
- json_t *json;
- specpdl_ref count;
-
- if (VECTORP (lisp))
- {
- ptrdiff_t size = ASIZE (lisp);
- json = json_check (json_array ());
- count = SPECPDL_INDEX ();
- record_unwind_protect_ptr (json_release_object, json);
- for (ptrdiff_t i = 0; i < size; ++i)
- {
- int status
- = json_array_append_new (json, lisp_to_json (AREF (lisp, i),
- conf));
- if (status == -1)
- json_out_of_memory ();
- }
- eassert (json_array_size (json) == size);
- }
- else if (HASH_TABLE_P (lisp))
- {
- struct Lisp_Hash_Table *h = XHASH_TABLE (lisp);
- json = json_check (json_object ());
- count = SPECPDL_INDEX ();
- record_unwind_protect_ptr (json_release_object, json);
- DOHASH (h, key, v)
- {
- CHECK_STRING (key);
- Lisp_Object ekey = json_encode (key);
- /* We can't specify the length, so the string must be
- null-terminated. */
- check_string_without_embedded_nulls (ekey);
- const char *key_str = SSDATA (ekey);
- /* Reject duplicate keys. These are possible if the hash
- table test is not `equal'. */
- if (json_object_get (json, key_str) != NULL)
- wrong_type_argument (Qjson_value_p, lisp);
- int status
- = json_object_set_new (json, key_str,
- lisp_to_json (v, conf));
- if (status == -1)
- {
- /* A failure can be caused either by an invalid key or
- by low memory. */
- json_check_utf8 (ekey);
- json_out_of_memory ();
- }
- }
- }
- else if (NILP (lisp))
- return json_check (json_object ());
- else if (CONSP (lisp))
- {
- Lisp_Object tail = lisp;
- json = json_check (json_object ());
- count = SPECPDL_INDEX ();
- record_unwind_protect_ptr (json_release_object, json);
- bool is_plist = !CONSP (XCAR (tail));
- FOR_EACH_TAIL (tail)
- {
- const char *key_str;
- Lisp_Object value;
- Lisp_Object key_symbol;
- if (is_plist)
- {
- key_symbol = XCAR (tail);
- tail = XCDR (tail);
- CHECK_CONS (tail);
- value = XCAR (tail);
- }
- else
- {
- Lisp_Object pair = XCAR (tail);
- CHECK_CONS (pair);
- key_symbol = XCAR (pair);
- value = XCDR (pair);
- }
- CHECK_SYMBOL (key_symbol);
- Lisp_Object key = SYMBOL_NAME (key_symbol);
- /* We can't specify the length, so the string must be
- null-terminated. */
- check_string_without_embedded_nulls (key);
- key_str = SSDATA (key);
- /* In plists, ensure leading ":" in keys is stripped. It
- will be reconstructed later in `json_to_lisp'.*/
- if (is_plist && ':' == key_str[0] && key_str[1])
- {
- key_str = &key_str[1];
- }
- /* Only add element if key is not already present. */
- if (json_object_get (json, key_str) == NULL)
- {
- int status
- = json_object_set_new (json, key_str, lisp_to_json (value,
- conf));
- if (status == -1)
- json_out_of_memory ();
- }
- }
- CHECK_LIST_END (tail, lisp);
- }
- else
- wrong_type_argument (Qjson_value_p, lisp);
-
- clear_unwind_protect (count);
- unbind_to (count, Qnil);
- return json;
-}
-
-/* Convert LISP to a nonscalar JSON object (array or object). Signal
- an error of type `wrong-type-argument' if LISP is not a vector,
- hashtable, alist, or plist. */
-
-static json_t *
-lisp_to_json_nonscalar (Lisp_Object lisp,
- const struct json_configuration *conf)
-{
- if (++lisp_eval_depth > max_lisp_eval_depth)
- xsignal0 (Qjson_object_too_deep);
- json_t *json = lisp_to_json_nonscalar_1 (lisp, conf);
- --lisp_eval_depth;
- return json;
-}
-
-/* Convert LISP to any JSON object. Signal an error of type
- `wrong-type-argument' if the type of LISP can't be converted to a
- JSON object. */
-
-static json_t *
-lisp_to_json (Lisp_Object lisp, const struct json_configuration *conf)
-{
- if (EQ (lisp, conf->null_object))
- return json_check (json_null ());
- else if (EQ (lisp, conf->false_object))
- return json_check (json_false ());
- else if (EQ (lisp, Qt))
- return json_check (json_true ());
- else if (INTEGERP (lisp))
- {
- intmax_t low = TYPE_MINIMUM (json_int_t);
- intmax_t high = TYPE_MAXIMUM (json_int_t);
- intmax_t value = check_integer_range (lisp, low, high);
- return json_check (json_integer (value));
- }
- else if (FLOATP (lisp))
- return json_check (json_real (XFLOAT_DATA (lisp)));
- else if (STRINGP (lisp))
- {
- Lisp_Object encoded = json_encode (lisp);
- json_t *json = json_stringn (SSDATA (encoded), SBYTES (encoded));
- if (json == NULL)
- {
- /* A failure can be caused either by an invalid string or by
- low memory. */
- json_check_utf8 (encoded);
- json_out_of_memory ();
- }
- return json;
- }
-
- /* LISP now must be a vector, hashtable, alist, or plist. */
- return lisp_to_json_nonscalar (lisp, conf);
-}
-
static void
json_parse_args (ptrdiff_t nargs,
Lisp_Object *args,
@@ -585,6 +387,332 @@ DEFUN ("json--available-p", Fjson__available_p, Sjson__available_p, 0, 0, NULL,
return json_available_p () ? Qt : Qnil;
}
+/* JSON encoding context */
+typedef struct {
+ char *buf;
+ ptrdiff_t size; /* number of bytes in buf */
+ ptrdiff_t capacity; /* allocated size of buf */
+ ptrdiff_t chars_delta; /* size - {number of Unicode chars in buf} */
+
+ int maxdepth;
+ struct json_configuration conf;
+} json_out_t;
+
+static NO_INLINE void
+json_out_grow (json_out_t *jo, ptrdiff_t bytes)
+{
+ ptrdiff_t need = jo->size + bytes;
+ ptrdiff_t new_size = max (need, 512);
+ while (new_size < need)
+ new_size <<= 1;
+ jo->buf = xrealloc (jo->buf, new_size);
+ jo->capacity = new_size;
+}
+
+static void
+cleanup_json_out (void *arg)
+{
+ json_out_t *jo = arg;
+ xfree (jo->buf);
+ jo->buf = NULL;
+}
+
+/* Make room for `bytes` more bytes in buffer. */
+static void
+json_make_room (json_out_t *jo, ptrdiff_t bytes)
+{
+ if (bytes > jo->capacity - jo->size)
+ json_out_grow (jo, bytes);
+}
+
+#define JSON_OUT_STR(jo, str) (json_out_str (jo, str, sizeof (str) - 1))
+
+/* Add `bytes` bytes from `str` to the buffer. */
+static void
+json_out_str (json_out_t *jo, const char *str, size_t bytes)
+{
+ json_make_room (jo, bytes);
+ memcpy (jo->buf + jo->size, str, bytes);
+ jo->size += bytes;
+}
+
+static void
+json_out_byte (json_out_t *jo, unsigned char c)
+{
+ json_make_room (jo, 1);
+ jo->buf[jo->size++] = c;
+}
+
+static void
+json_out_fixnum (json_out_t *jo, EMACS_INT x)
+{
+ char buf[INT_BUFSIZE_BOUND (EMACS_INT)];
+ char *end = buf + sizeof buf;
+ char *p = fixnum_to_string (x, buf, end);
+ json_out_str (jo, p, end - p);
+}
+
+static AVOID
+string_not_unicode (Lisp_Object obj)
+{
+ /* FIXME: for test compatibility, not a very descriptive error */
+ wrong_type_argument (Qjson_value_p, obj);
+}
+
+static unsigned char json_plain_char[256] = {
+ /* 32 chars/line: 1 for printable ASCII + DEL except " and \, 0 elsewhere */
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 00-1f */
+ 1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 20-3f */
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1, /* 40-5f */
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 60-7f */
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 80-9f */
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* a0-bf */
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* c0-df */
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* e0-ff */
+};
+
+static void
+json_out_string (json_out_t *jo, Lisp_Object str, int skip)
+{
+ /* FIXME: this code is slow, make faster! */
+
+ static const char hexchar[16] = "0123456789ABCDEF";
+ ptrdiff_t len = SBYTES (str);
+ json_make_room (jo, len + 2);
+ json_out_byte (jo, '"');
+ unsigned char *p = SDATA (str);
+ unsigned char *end = p + len;
+ p += skip;
+ while (p < end)
+ {
+ unsigned char c = *p;
+ if (json_plain_char[c])
+ {
+ json_out_byte (jo, c);
+ p++;
+ }
+ else if (c > 0x7f)
+ {
+ if (STRING_MULTIBYTE (str))
+ {
+ int n;
+ if (c <= 0xc1)
+ string_not_unicode (str);
+ if (c <= 0xdf)
+ n = 2;
+ else if (c <= 0xef)
+ {
+ int v = (((c & 0x0f) << 12)
+ + ((p[1] & 0x3f) << 6) + (p[2] & 0x3f));
+ if (char_surrogate_p (v))
+ string_not_unicode (str);
+ n = 3;
+ }
+ else if (c <= 0xf7)
+ {
+ int v = (((c & 0x07) << 18)
+ + ((p[1] & 0x3f) << 12)
+ + ((p[2] & 0x3f) << 6)
+ + (p[3] & 0x3f));
+ if (v > MAX_UNICODE_CHAR)
+ string_not_unicode (str);
+ n = 4;
+ }
+ else
+ string_not_unicode (str);
+ json_out_str (jo, (const char *)p, n);
+ jo->chars_delta += n - 1;
+ p += n;
+ }
+ else
+ string_not_unicode (str);
+ }
+ else
+ {
+ json_out_byte (jo, '\\');
+ switch (c)
+ {
+ case '"':
+ case '\\': json_out_byte (jo, c); break;
+ case '\b': json_out_byte (jo, 'b'); break;
+ case '\t': json_out_byte (jo, 't'); break;
+ case '\n': json_out_byte (jo, 'n'); break;
+ case '\f': json_out_byte (jo, 'f'); break;
+ case '\r': json_out_byte (jo, 'r'); break;
+ default:
+ {
+ char hex[5] = { 'u', '0', '0',
+ hexchar[c >> 4], hexchar[c & 0xf] };
+ json_out_str (jo, hex, 5);
+ break;
+ }
+ }
+ p++;
+ }
+ }
+ json_out_byte (jo, '"');
+}
+
+static void
+json_out_nest (json_out_t *jo)
+{
+ --jo->maxdepth;
+ if (jo->maxdepth < 0)
+ error ("Maximum JSON serialisation depth exceeded");
+}
+
+static void
+json_out_unnest (json_out_t *jo)
+{
+ ++jo->maxdepth;
+}
+
+static void json_out_something (json_out_t *jo, Lisp_Object obj);
+
+static void
+json_out_object_cons (json_out_t *jo, Lisp_Object obj)
+{
+ json_out_nest (jo);
+ json_out_byte (jo, '{');
+ bool is_alist = CONSP (XCAR (obj));
+ bool first = true;
+ Lisp_Object tail = obj;
+ FOR_EACH_TAIL (tail)
+ {
+ if (!first)
+ json_out_byte (jo, ',');
+ first = false;
+ Lisp_Object key_sym;
+ Lisp_Object value;
+ if (is_alist)
+ {
+ Lisp_Object pair = XCAR (tail);
+ CHECK_CONS (pair);
+ key_sym = XCAR (pair);
+ value = XCDR (pair);
+ }
+ else
+ {
+ key_sym = XCAR (tail);
+ tail = XCDR (tail);
+ CHECK_CONS (tail);
+ value = XCAR (tail);
+ }
+ /* FIXME: do we care about dup keys here? (probably not) */
+ CHECK_SYMBOL (key_sym);
+ Lisp_Object key = SYMBOL_NAME (key_sym);
+ const char *keystr = SSDATA (key);
+ /* Skip leading ':' in plist keys. */
+ int skip = !is_alist && keystr[0] == ':' && keystr[1] ? 1 : 0;
+ json_out_string (jo, key, skip);
+ json_out_byte (jo, ':');
+ json_out_something (jo, value);
+ }
+ CHECK_LIST_END (tail, obj);
+ json_out_byte (jo, '}');
+ json_out_unnest (jo);
+}
+
+static void
+json_out_object_hash (json_out_t *jo, Lisp_Object obj)
+{
+ json_out_nest (jo);
+ json_out_byte (jo, '{');
+ struct Lisp_Hash_Table *h = XHASH_TABLE (obj);
+ bool first = true;
+ DOHASH (h, k, v)
+ {
+ if (!first)
+ json_out_byte (jo, ',');
+ first = false;
+ /* FIXME: do we care about dup keys here? (probably not) */
+ CHECK_STRING (k);
+ json_out_string (jo, k, 0);
+ json_out_byte (jo, ':');
+ json_out_something (jo, v);
+ }
+ json_out_byte (jo, '}');
+ json_out_unnest (jo);
+
+}
+
+static void
+json_out_array (json_out_t *jo, Lisp_Object obj)
+{
+ json_out_nest (jo);
+ json_out_byte (jo, '[');
+ ptrdiff_t n = ASIZE (obj);
+ for (ptrdiff_t i = 0; i < n; i++)
+ {
+ if (i > 0)
+ json_out_byte (jo, ',');
+ json_out_something (jo, AREF (obj, i));
+ }
+ json_out_byte (jo, ']');
+ json_out_unnest (jo);
+}
+
+static void
+json_out_float (json_out_t *jo, Lisp_Object f)
+{
+ double x = XFLOAT_DATA (f);
+ if (isinf (x) || isnan (x))
+ signal_error ("not a finite number", f);
+ json_make_room (jo, FLOAT_TO_STRING_BUFSIZE);
+ int n = float_to_string (jo->buf + jo->size, x);
+ jo->size += n;
+}
+
+static void
+json_out_bignum (json_out_t *jo, Lisp_Object x)
+{
+ int base = 10;
+ ptrdiff_t size = bignum_bufsize (x, base);
+ json_make_room (jo, size);
+ int n = bignum_to_c_string (jo->buf + jo->size, size, x, base);
+ jo->size += n;
+}
+
+static void
+json_out_something (json_out_t *jo, Lisp_Object obj)
+{
+ if (EQ (obj, jo->conf.null_object))
+ JSON_OUT_STR (jo, "null");
+ else if (EQ (obj, jo->conf.false_object))
+ JSON_OUT_STR (jo, "false");
+ else if (EQ (obj, Qt))
+ JSON_OUT_STR (jo, "true");
+ else if (NILP (obj))
+ JSON_OUT_STR (jo, "{}");
+ else if (FIXNUMP (obj))
+ json_out_fixnum (jo, XFIXNUM (obj));
+ else if (STRINGP (obj))
+ json_out_string (jo, obj, 0);
+ else if (CONSP (obj))
+ json_out_object_cons (jo, obj);
+ else if (FLOATP (obj))
+ json_out_float (jo, obj);
+ else if (HASH_TABLE_P (obj))
+ json_out_object_hash (jo, obj);
+ else if (VECTORP (obj))
+ json_out_array (jo, obj);
+ else if (BIGNUMP (obj))
+ json_out_bignum (jo, obj);
+ else
+ wrong_type_argument (Qjson_value_p, obj);
+}
+
+static Lisp_Object
+json_out_string_result (json_out_t *jo)
+{
+ /* FIXME: should this be a unibyte or multibyte string?
+ Right now we make a multibyte string for test compatibility,
+ but we are really encoding so unibyte would make more sense. */
+ ptrdiff_t nchars = jo->size - jo->chars_delta;
+ return make_multibyte_string (jo->buf, nchars, jo->size);
+}
+
+/* FIXME: update doc string */
DEFUN ("json-serialize", Fjson_serialize, Sjson_serialize, 1, MANY,
NULL,
doc: /* Return the JSON representation of OBJECT as a string.
@@ -614,95 +742,17 @@ DEFUN ("json-serialize", Fjson_serialize, Sjson_serialize, 1, MANY,
usage: (json-serialize OBJECT &rest ARGS) */)
(ptrdiff_t nargs, Lisp_Object *args)
{
- specpdl_ref count = SPECPDL_INDEX ();
+ json_out_t jo = {
+ .maxdepth = 25,
+ .conf = {json_object_hashtable, json_array_array, QCnull, QCfalse}
+ };
+ json_parse_args (nargs - 1, args + 1, &jo.conf, false);
+ Lisp_Object object = args[0];
-#ifdef WINDOWSNT
- ensure_json_available ();
-#endif
-
- struct json_configuration conf =
- {json_object_hashtable, json_array_array, QCnull, QCfalse};
- json_parse_args (nargs - 1, args + 1, &conf, false);
-
- json_t *json = lisp_to_json (args[0], &conf);
- record_unwind_protect_ptr (json_release_object, json);
-
- char *string = json_dumps (json, JSON_COMPACT | JSON_ENCODE_ANY);
- if (string == NULL)
- json_out_of_memory ();
- record_unwind_protect_ptr (json_free, string);
-
- return unbind_to (count, build_string_from_utf8 (string));
-}
-
-struct json_buffer_and_size
-{
- const char *buffer;
- ptrdiff_t size;
- /* This tracks how many bytes were inserted by the callback since
- json_dump_callback was called. */
- ptrdiff_t inserted_bytes;
-};
-
-static Lisp_Object
-json_insert (void *data)
-{
- struct json_buffer_and_size *buffer_and_size = data;
- ptrdiff_t len = buffer_and_size->size;
- ptrdiff_t inserted_bytes = buffer_and_size->inserted_bytes;
- ptrdiff_t gap_size = GAP_SIZE - inserted_bytes;
-
- /* Enlarge the gap if necessary. */
- if (gap_size < len)
- make_gap (len - gap_size);
-
- /* Copy this chunk of data into the gap. */
- memcpy ((char *) BEG_ADDR + PT_BYTE - BEG_BYTE + inserted_bytes,
- buffer_and_size->buffer, len);
- buffer_and_size->inserted_bytes += len;
- return Qnil;
-}
-
-static Lisp_Object
-json_handle_nonlocal_exit (enum nonlocal_exit type, Lisp_Object data)
-{
- switch (type)
- {
- case NONLOCAL_EXIT_SIGNAL:
- return data;
- case NONLOCAL_EXIT_THROW:
- return Fcons (Qno_catch, data);
- default:
- eassume (false);
- }
-}
-
-struct json_insert_data
-{
- /* This tracks how many bytes were inserted by the callback since
- json_dump_callback was called. */
- ptrdiff_t inserted_bytes;
- /* nil if json_insert succeeded, otherwise the symbol
- Qcatch_all_memory_full or a cons (ERROR-SYMBOL . ERROR-DATA). */
- Lisp_Object error;
-};
-
-/* Callback for json_dump_callback that inserts a JSON representation
- as a unibyte string into the gap. DATA must point to a structure
- of type json_insert_data. This function may not exit nonlocally.
- It catches all nonlocal exits and stores them in data->error for
- reraising. */
-
-static int
-json_insert_callback (const char *buffer, size_t size, void *data)
-{
- struct json_insert_data *d = data;
- struct json_buffer_and_size buffer_and_size
- = {.buffer = buffer, .size = size, .inserted_bytes = d->inserted_bytes};
- d->error = internal_catch_all (json_insert, &buffer_and_size,
- json_handle_nonlocal_exit);
- d->inserted_bytes = buffer_and_size.inserted_bytes;
- return NILP (d->error) ? 0 : -1;
+ specpdl_ref count = SPECPDL_INDEX ();
+ record_unwind_protect_ptr (cleanup_json_out, &jo);
+ json_out_something (&jo, object);
+ return unbind_to (count, json_out_string_result (&jo));
}
DEFUN ("json-insert", Fjson_insert, Sjson_insert, 1, MANY,
@@ -714,72 +764,61 @@ DEFUN ("json-insert", Fjson_insert, Sjson_insert, 1, MANY,
usage: (json-insert OBJECT &rest ARGS) */)
(ptrdiff_t nargs, Lisp_Object *args)
{
- specpdl_ref count = SPECPDL_INDEX ();
+ json_out_t jo = {
+ .maxdepth = 25,
+ .conf = {json_object_hashtable, json_array_array, QCnull, QCfalse}
+ };
+ json_parse_args (nargs - 1, args + 1, &jo.conf, false);
+ Lisp_Object object = args[0];
-#ifdef WINDOWSNT
- ensure_json_available ();
-#endif
-
- struct json_configuration conf =
- {json_object_hashtable, json_array_array, QCnull, QCfalse};
- json_parse_args (nargs - 1, args + 1, &conf, false);
+ specpdl_ref count = SPECPDL_INDEX ();
+ record_unwind_protect_ptr (cleanup_json_out, &jo);
+ json_out_something (&jo, object);
- json_t *json = lisp_to_json (args[0], &conf);
- record_unwind_protect_ptr (json_release_object, json);
+ /* FIXME: Do we really need to do all this work below to insert a string?
+ Is there no function already written? I must be missing something. */
prepare_to_modify_buffer (PT, PT, NULL);
move_gap_both (PT, PT_BYTE);
- struct json_insert_data data;
- data.inserted_bytes = 0;
- /* Could have used json_dumpb, but that became available only in
- Jansson 2.10, whereas we want to support 2.7 and upward. */
- int status = json_dump_callback (json, json_insert_callback, &data,
- JSON_COMPACT | JSON_ENCODE_ANY);
- if (status == -1)
- {
- if (CONSP (data.error))
- xsignal (XCAR (data.error), XCDR (data.error));
- else
- json_out_of_memory ();
- }
+ if (GAP_SIZE < jo.size)
+ make_gap (jo.size - GAP_SIZE);
+ memcpy ((char *) BEG_ADDR + PT_BYTE - BEG_BYTE, jo.buf, jo.size);
+
+ /* No need to keep allocation beyond this point. */
+ unbind_to (count, Qnil);
ptrdiff_t inserted = 0;
- ptrdiff_t inserted_bytes = data.inserted_bytes;
- if (inserted_bytes > 0)
+ ptrdiff_t inserted_bytes = jo.size;
+
+ /* If required, decode the stuff we've read into the gap. */
+ struct coding_system coding;
+ /* JSON strings are UTF-8 encoded strings. */
+ setup_coding_system (Qutf_8_unix, &coding);
+ coding.dst_multibyte = !NILP (BVAR (current_buffer,
+ enable_multibyte_characters));
+ if (CODING_MAY_REQUIRE_DECODING (&coding))
{
- /* If required, decode the stuff we've read into the gap. */
- struct coding_system coding;
- /* JSON strings are UTF-8 encoded strings. If for some reason
- the text returned by the Jansson library includes invalid
- byte sequences, they will be represented by raw bytes in the
- buffer text. */
- setup_coding_system (Qutf_8_unix, &coding);
- coding.dst_multibyte =
- !NILP (BVAR (current_buffer, enable_multibyte_characters));
- if (CODING_MAY_REQUIRE_DECODING (&coding))
- {
- /* Now we have all the new bytes at the beginning of the gap,
- but `decode_coding_gap` needs them at the end of the gap, so
- we need to move them. */
- memmove (GAP_END_ADDR - inserted_bytes, GPT_ADDR, inserted_bytes);
- decode_coding_gap (&coding, inserted_bytes);
- inserted = coding.produced_char;
- }
- else
- {
- /* Make the inserted text part of the buffer, as unibyte text. */
- eassert (NILP (BVAR (current_buffer, enable_multibyte_characters)));
- insert_from_gap_1 (inserted_bytes, inserted_bytes, false);
-
- /* The target buffer is unibyte, so we don't need to decode. */
- invalidate_buffer_caches (current_buffer,
- PT, PT + inserted_bytes);
- adjust_after_insert (PT, PT_BYTE,
- PT + inserted_bytes,
- PT_BYTE + inserted_bytes,
- inserted_bytes);
- inserted = inserted_bytes;
- }
+ /* Now we have all the new bytes at the beginning of the gap,
+ but `decode_coding_gap` needs them at the end of the gap, so
+ we need to move them. */
+ memmove (GAP_END_ADDR - inserted_bytes, GPT_ADDR, inserted_bytes);
+ decode_coding_gap (&coding, inserted_bytes);
+ inserted = coding.produced_char;
+ }
+ else
+ {
+ /* Make the inserted text part of the buffer, as unibyte text. */
+ eassert (NILP (BVAR (current_buffer, enable_multibyte_characters)));
+ insert_from_gap_1 (inserted_bytes, inserted_bytes, false);
+
+ /* The target buffer is unibyte, so we don't need to decode. */
+ invalidate_buffer_caches (current_buffer,
+ PT, PT + inserted_bytes);
+ adjust_after_insert (PT, PT_BYTE,
+ PT + inserted_bytes,
+ PT_BYTE + inserted_bytes,
+ inserted_bytes);
+ inserted = inserted_bytes;
}
/* Call after-change hooks. */
@@ -791,7 +830,7 @@ DEFUN ("json-insert", Fjson_insert, Sjson_insert, 1, MANY,
SET_PT_BOTH (PT + inserted, PT_BYTE + inserted_bytes);
}
- return unbind_to (count, Qnil);
+ return Qnil;
}
/* Convert a JSON object to a Lisp object. */
diff --git a/test/src/json-tests.el b/test/src/json-tests.el
index dffc6291ca1..c908c031a82 100644
--- a/test/src/json-tests.el
+++ b/test/src/json-tests.el
@@ -90,8 +90,11 @@ json-serialize/object
"{\"abc\":[1,2,true],\"def\":null}"))
(should (equal (json-serialize nil) "{}"))
(should (equal (json-serialize '((abc))) "{\"abc\":{}}"))
- (should (equal (json-serialize '((a . 1) (b . 2) (a . 3)))
+ (should (equal (json-serialize '((a . 1) (b . 2)))
"{\"a\":1,\"b\":2}"))
+ ;; FIXME: we assume hash table keys to be unique
+ ;; (should (equal (json-serialize '((a . 1) (b . 2) (a . 3)))
+ ;; "{\"a\":1,\"b\":2}"))
(should-error (json-serialize '(abc)) :type 'wrong-type-argument)
(should-error (json-serialize '((a 1))) :type 'wrong-type-argument)
(should-error (json-serialize '((1 . 2))) :type 'wrong-type-argument)
@@ -124,13 +127,14 @@ json-serialize/object
\"detect-plist\":{\"bla\":\"ble\"}\
}")))
-(ert-deftest json-serialize/object-with-duplicate-keys ()
- (skip-unless (fboundp 'json-serialize))
- (let ((table (make-hash-table :test #'eq)))
- (puthash (copy-sequence "abc") [1 2 t] table)
- (puthash (copy-sequence "abc") :null table)
- (should (equal (hash-table-count table) 2))
- (should-error (json-serialize table) :type 'wrong-type-argument)))
+;; FIXME: we don't check for duplicate keys
+;; (ert-deftest json-serialize/object-with-duplicate-keys ()
+;; (skip-unless (fboundp 'json-serialize))
+;; (let ((table (make-hash-table :test #'eq)))
+;; (puthash (copy-sequence "abc") [1 2 t] table)
+;; (puthash (copy-sequence "abc") :null table)
+;; (should (equal (hash-table-count table) 2))
+;; (should-error (json-serialize table) :type 'wrong-type-argument)))
(ert-deftest json-parse-string/object ()
(skip-unless (fboundp 'json-parse-string))
@@ -174,7 +178,10 @@ json-serialize/string
"[\"\\nasdфыв\\u001F\u007ffgh\\t\"]"))
(should (equal (json-serialize ["a\0b"]) "[\"a\\u0000b\"]"))
;; FIXME: Is this the right behavior?
- (should (equal (json-serialize ["\u00C4\xC3\x84"]) "[\"\u00C4\u00C4\"]")))
+ ;; FIXME: (no it's not)
+ ;; (should (equal (json-serialize ["\u00C4\xC3\x84"]) "[\"\u00C4\u00C4\"]"))
+ (should-error (json-serialize ["\u00C4\xC3\x84"]))
+ )
(ert-deftest json-serialize/invalid-unicode ()
(skip-unless (fboundp 'json-serialize))
^ permalink raw reply related [flat|nested] 37+ messages in thread
* bug#70007: [PATCH] native JSON encoder
2024-03-26 15:33 bug#70007: [PATCH] native JSON encoder Mattias Engdegård
@ 2024-03-26 16:46 ` Eli Zaretskii
2024-03-27 12:46 ` Mattias Engdegård
2024-03-28 19:16 ` Theodor Thornhill via Bug reports for GNU Emacs, the Swiss army knife of text editors
0 siblings, 2 replies; 37+ messages in thread
From: Eli Zaretskii @ 2024-03-26 16:46 UTC (permalink / raw)
To: Mattias Engdegård; +Cc: 70007
> From: Mattias Engdegård <mattias.engdegard@gmail.com>
> Date: Tue, 26 Mar 2024 16:33:52 +0100
>
> If we replace the lisp-to-JSON encoder with native code, we would not need the jansson library for it and it would be faster.
>
> There is ongoing work on a JSON-to-lisp parser, but the author has made it clear that he does not have time to write an encoder, so I spent a morning mashing up the attached patch.
Thanks for working on this.
> It generally produces the same result as the old code, except:
>
> - The old code incorrectly accepted strings with non-Unicode characters (raw bytes). There is no reason to do this; JSON is UTF-8 only.
Would it complicate the code not to reject raw bytes? I'd like to
avoid incompatibilities if it's practical. Also, Emacs traditionally
doesn't reject raw bytes, leaving that to the application or the user.
> I'd be very happy if someone could test it with packages that use this interface (json-serialise, json-insert).
Yes, please.
^ permalink raw reply [flat|nested] 37+ messages in thread
* bug#70007: [PATCH] native JSON encoder
2024-03-26 16:46 ` Eli Zaretskii
@ 2024-03-27 12:46 ` Mattias Engdegård
2024-03-27 15:49 ` Mattias Engdegård
2024-03-28 19:16 ` Theodor Thornhill via Bug reports for GNU Emacs, the Swiss army knife of text editors
1 sibling, 1 reply; 37+ messages in thread
From: Mattias Engdegård @ 2024-03-27 12:46 UTC (permalink / raw)
To: Eli Zaretskii; +Cc: 70007
26 mars 2024 kl. 17.46 skrev Eli Zaretskii <eliz@gnu.org>:
>> - The old code incorrectly accepted strings with non-Unicode characters (raw bytes). There is no reason to do this; JSON is UTF-8 only.
>
> Would it complicate the code not to reject raw bytes? I'd like to
> avoid incompatibilities if it's practical. Also, Emacs traditionally
> doesn't reject raw bytes, leaving that to the application or the user.
Actually I may have misrepresented the behaviour of the old encoder. It doesn't accept any raw bytes but only sequences that happen to form valid UTF-8. It's quite strange, and I don't really think this was ever intended, just a consequence of the implementation.
This means that it accepts an already encoded unibyte UTF-8 string:
(json-serialize "\303\251") -> "\"é\""
which is doubly odd since it's supposed to be encoding, but it ends up decoding the characters instead.
Even worse, it accepts mixtures of encoded and decoded chars:
(json-serialize "é\303\251") -> "\"éé\""
which is just bonkers.
So while we could try to replicate this 'interesting' behaviour it would definitely complicate the code and be of questionable use.
The JSON spec is quite clear that it's UTF-8 only. The only useful deviation that I can think of would be to allow unpaired surrogates (WTF-8) to pass through for transmission of Windows file names, but that would be an extension -- the old encoder doesn't permit those.
^ permalink raw reply [flat|nested] 37+ messages in thread
* bug#70007: [PATCH] native JSON encoder
2024-03-27 12:46 ` Mattias Engdegård
@ 2024-03-27 15:49 ` Mattias Engdegård
2024-03-27 17:40 ` Eli Zaretskii
0 siblings, 1 reply; 37+ messages in thread
From: Mattias Engdegård @ 2024-03-27 15:49 UTC (permalink / raw)
To: Eli Zaretskii; +Cc: 70007
[-- Attachment #1: Type: text/plain, Size: 367 bytes --]
Here is an updated patch. It now ignores duplicated keys in objects represented by alists and plists, just like the old encoder. (I didn't include this in the first draft out of fear it would be slow and complicated, but it turned out just to be complicated.)
The performance is still acceptable, which means at least 2x the speed of the Jansson-based encoder.
[-- Attachment #2: json-serialise.diff --]
[-- Type: application/octet-stream, Size: 33508 bytes --]
diff --git a/src/json.c b/src/json.c
index e849ccaf722..b853dec3b21 100644
--- a/src/json.c
+++ b/src/json.c
@@ -23,6 +23,7 @@ Copyright (C) 2017-2024 Free Software Foundation, Inc.
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
+#include <math.h>
#include <jansson.h>
@@ -231,12 +232,6 @@ json_encode (Lisp_Object string)
return encode_string_utf_8 (string, Qnil, false, Qt, Qt);
}
-static AVOID
-json_out_of_memory (void)
-{
- xsignal0 (Qjson_out_of_memory);
-}
-
/* Signal a Lisp error corresponding to the JSON ERROR. */
static AVOID
@@ -289,26 +284,6 @@ check_string_without_embedded_nulls (Lisp_Object object)
Qstring_without_embedded_nulls_p, object);
}
-/* Signal an error of type `json-out-of-memory' if OBJECT is
- NULL. */
-
-static json_t *
-json_check (json_t *object)
-{
- if (object == NULL)
- json_out_of_memory ();
- return object;
-}
-
-/* If STRING is not a valid UTF-8 string, signal an error of type
- `wrong-type-argument'. STRING must be a unibyte string. */
-
-static void
-json_check_utf8 (Lisp_Object string)
-{
- CHECK_TYPE (utf8_string_p (string), Qutf_8_string_p, string);
-}
-
enum json_object_type {
json_object_hashtable,
json_object_alist,
@@ -327,179 +302,6 @@ json_check_utf8 (Lisp_Object string)
Lisp_Object false_object;
};
-static json_t *lisp_to_json (Lisp_Object,
- const struct json_configuration *conf);
-
-/* Convert a Lisp object to a nonscalar JSON object (array or object). */
-
-static json_t *
-lisp_to_json_nonscalar_1 (Lisp_Object lisp,
- const struct json_configuration *conf)
-{
- json_t *json;
- specpdl_ref count;
-
- if (VECTORP (lisp))
- {
- ptrdiff_t size = ASIZE (lisp);
- json = json_check (json_array ());
- count = SPECPDL_INDEX ();
- record_unwind_protect_ptr (json_release_object, json);
- for (ptrdiff_t i = 0; i < size; ++i)
- {
- int status
- = json_array_append_new (json, lisp_to_json (AREF (lisp, i),
- conf));
- if (status == -1)
- json_out_of_memory ();
- }
- eassert (json_array_size (json) == size);
- }
- else if (HASH_TABLE_P (lisp))
- {
- struct Lisp_Hash_Table *h = XHASH_TABLE (lisp);
- json = json_check (json_object ());
- count = SPECPDL_INDEX ();
- record_unwind_protect_ptr (json_release_object, json);
- DOHASH (h, key, v)
- {
- CHECK_STRING (key);
- Lisp_Object ekey = json_encode (key);
- /* We can't specify the length, so the string must be
- null-terminated. */
- check_string_without_embedded_nulls (ekey);
- const char *key_str = SSDATA (ekey);
- /* Reject duplicate keys. These are possible if the hash
- table test is not `equal'. */
- if (json_object_get (json, key_str) != NULL)
- wrong_type_argument (Qjson_value_p, lisp);
- int status
- = json_object_set_new (json, key_str,
- lisp_to_json (v, conf));
- if (status == -1)
- {
- /* A failure can be caused either by an invalid key or
- by low memory. */
- json_check_utf8 (ekey);
- json_out_of_memory ();
- }
- }
- }
- else if (NILP (lisp))
- return json_check (json_object ());
- else if (CONSP (lisp))
- {
- Lisp_Object tail = lisp;
- json = json_check (json_object ());
- count = SPECPDL_INDEX ();
- record_unwind_protect_ptr (json_release_object, json);
- bool is_plist = !CONSP (XCAR (tail));
- FOR_EACH_TAIL (tail)
- {
- const char *key_str;
- Lisp_Object value;
- Lisp_Object key_symbol;
- if (is_plist)
- {
- key_symbol = XCAR (tail);
- tail = XCDR (tail);
- CHECK_CONS (tail);
- value = XCAR (tail);
- }
- else
- {
- Lisp_Object pair = XCAR (tail);
- CHECK_CONS (pair);
- key_symbol = XCAR (pair);
- value = XCDR (pair);
- }
- CHECK_SYMBOL (key_symbol);
- Lisp_Object key = SYMBOL_NAME (key_symbol);
- /* We can't specify the length, so the string must be
- null-terminated. */
- check_string_without_embedded_nulls (key);
- key_str = SSDATA (key);
- /* In plists, ensure leading ":" in keys is stripped. It
- will be reconstructed later in `json_to_lisp'.*/
- if (is_plist && ':' == key_str[0] && key_str[1])
- {
- key_str = &key_str[1];
- }
- /* Only add element if key is not already present. */
- if (json_object_get (json, key_str) == NULL)
- {
- int status
- = json_object_set_new (json, key_str, lisp_to_json (value,
- conf));
- if (status == -1)
- json_out_of_memory ();
- }
- }
- CHECK_LIST_END (tail, lisp);
- }
- else
- wrong_type_argument (Qjson_value_p, lisp);
-
- clear_unwind_protect (count);
- unbind_to (count, Qnil);
- return json;
-}
-
-/* Convert LISP to a nonscalar JSON object (array or object). Signal
- an error of type `wrong-type-argument' if LISP is not a vector,
- hashtable, alist, or plist. */
-
-static json_t *
-lisp_to_json_nonscalar (Lisp_Object lisp,
- const struct json_configuration *conf)
-{
- if (++lisp_eval_depth > max_lisp_eval_depth)
- xsignal0 (Qjson_object_too_deep);
- json_t *json = lisp_to_json_nonscalar_1 (lisp, conf);
- --lisp_eval_depth;
- return json;
-}
-
-/* Convert LISP to any JSON object. Signal an error of type
- `wrong-type-argument' if the type of LISP can't be converted to a
- JSON object. */
-
-static json_t *
-lisp_to_json (Lisp_Object lisp, const struct json_configuration *conf)
-{
- if (EQ (lisp, conf->null_object))
- return json_check (json_null ());
- else if (EQ (lisp, conf->false_object))
- return json_check (json_false ());
- else if (EQ (lisp, Qt))
- return json_check (json_true ());
- else if (INTEGERP (lisp))
- {
- intmax_t low = TYPE_MINIMUM (json_int_t);
- intmax_t high = TYPE_MAXIMUM (json_int_t);
- intmax_t value = check_integer_range (lisp, low, high);
- return json_check (json_integer (value));
- }
- else if (FLOATP (lisp))
- return json_check (json_real (XFLOAT_DATA (lisp)));
- else if (STRINGP (lisp))
- {
- Lisp_Object encoded = json_encode (lisp);
- json_t *json = json_stringn (SSDATA (encoded), SBYTES (encoded));
- if (json == NULL)
- {
- /* A failure can be caused either by an invalid string or by
- low memory. */
- json_check_utf8 (encoded);
- json_out_of_memory ();
- }
- return json;
- }
-
- /* LISP now must be a vector, hashtable, alist, or plist. */
- return lisp_to_json_nonscalar (lisp, conf);
-}
-
static void
json_parse_args (ptrdiff_t nargs,
Lisp_Object *args,
@@ -585,124 +387,498 @@ DEFUN ("json--available-p", Fjson__available_p, Sjson__available_p, 0, 0, NULL,
return json_available_p () ? Qt : Qnil;
}
-DEFUN ("json-serialize", Fjson_serialize, Sjson_serialize, 1, MANY,
- NULL,
- doc: /* Return the JSON representation of OBJECT as a string.
+/* JSON encoding context */
+typedef struct {
+ char *buf;
+ ptrdiff_t size; /* number of bytes in buf */
+ ptrdiff_t capacity; /* allocated size of buf */
+ ptrdiff_t chars_delta; /* size - {number of Unicode chars in buf} */
+
+ int maxdepth;
+ struct symset_tbl *ss_table; /* table used by containing object */
+ struct json_configuration conf;
+} json_out_t;
+
+/* Set of symbols */
+typedef struct {
+ int count; /* symbols in table */
+ int bits; /* log2(table size) */
+ struct symset_tbl *table; /* heap-allocated table */
+} symset_t;
+
+struct symset_tbl
+{
+ /* Table used by the containing object if any, so that we can easily
+ all tables if an error occurs. */
+ struct symset_tbl *up;
+ /* Table of symbols (2**bits entries), Qunbound where unused. */
+ Lisp_Object entries[];
+};
-OBJECT must be t, a number, string, vector, hashtable, alist, plist,
-or the Lisp equivalents to the JSON null and false values, and its
-elements must recursively consist of the same kinds of values. t will
-be converted to the JSON true value. Vectors will be converted to
-JSON arrays, whereas hashtables, alists and plists are converted to
-JSON objects. Hashtable keys must be strings without embedded null
-characters and must be unique within each object. Alist and plist
-keys must be symbols; if a key is duplicate, the first instance is
-used.
+static struct symset_tbl *
+alloc_symset_table (int bits)
+{
+ struct symset_tbl *st = xmalloc (sizeof *st + (sizeof *st->entries << bits));
+ int size = 1 << bits;
+ for (ptrdiff_t i = 0; i < size; i++)
+ st->entries[i] = Qunbound;
+ return st;
+}
-The Lisp equivalents to the JSON null and false values are
-configurable in the arguments ARGS, a list of keyword/argument pairs:
+/* Create a new symset to use for a new object. */
+static symset_t
+push_symset (json_out_t *jo)
+{
+ int bits = 4;
+ struct symset_tbl *tbl = alloc_symset_table (bits);
+ tbl->up = jo->ss_table;
+ jo->ss_table = tbl;
+ return (symset_t){ .count = 0, .bits = 4, .table = tbl };
+}
-The keyword argument `:null-object' specifies which object to use
-to represent a JSON null value. It defaults to `:null'.
+/* Destroy the current symset. */
+static void
+pop_symset (json_out_t *jo, symset_t *ss)
+{
+ jo->ss_table = ss->table->up;
+ xfree (ss->table);
+}
-The keyword argument `:false-object' specifies which object to use to
-represent a JSON false value. It defaults to `:false'.
+/* Remove all heap-allocated symset tables, in case an error occurred. */
+static void
+cleanup_symset_tables (struct symset_tbl *st)
+{
+ while (st)
+ {
+ struct symset_tbl *up = st->up;
+ xfree (st);
+ st = up;
+ }
+}
-In you specify the same value for `:null-object' and `:false-object',
-a potentially ambiguous situation, the JSON output will not contain
-any JSON false values.
-usage: (json-serialize OBJECT &rest ARGS) */)
- (ptrdiff_t nargs, Lisp_Object *args)
+static inline uint32_t
+symset_hash (Lisp_Object sym, int bits)
{
- specpdl_ref count = SPECPDL_INDEX ();
+ return knuth_hash (reduce_emacs_uint_to_hash_hash (XHASH (sym)), bits);
+}
-#ifdef WINDOWSNT
- ensure_json_available ();
-#endif
+/* Enlarge the table used by a symset. */
+static NO_INLINE void
+symset_expand (symset_t *ss)
+{
+ struct symset_tbl *old_table = ss->table;
+ int oldbits = ss->bits;
+ int oldsize = 1 << oldbits;
+ int bits = oldbits + 1;
+ ss->bits = bits;
+ ss->table = alloc_symset_table (bits);
+ ss->table->up = old_table->up;
+ /* Move all entries from the old table to the new one. */
+ int mask = (1 << bits) - 1;
+ struct symset_tbl *tbl = ss->table;
+ for (ptrdiff_t i = 0; i < oldsize; i++)
+ {
+ Lisp_Object sym = old_table->entries[i];
+ if (!BASE_EQ (sym, Qunbound))
+ {
+ ptrdiff_t j = symset_hash (sym, bits);
+ while (!BASE_EQ (tbl->entries[j], Qunbound))
+ j = (j + 1) & mask;
+ tbl->entries[j] = sym;
+ }
+ }
+ xfree (old_table);
+}
- struct json_configuration conf =
- {json_object_hashtable, json_array_array, QCnull, QCfalse};
- json_parse_args (nargs - 1, args + 1, &conf, false);
+/* If sym is in ss, return false; otherwise add it and return true.
+ Comparison is done by strict identity. */
+static inline bool
+symset_add (json_out_t *jo, symset_t *ss, Lisp_Object sym)
+{
+ /* Make sure we don't fill more than half of the table. */
+ if (ss->count * 2 >= (1 << ss->bits))
+ {
+ symset_expand (ss);
+ jo->ss_table = ss->table;
+ }
+
+ struct symset_tbl *tbl = ss->table;
+ int mask = (1 << ss->bits) - 1;
+ for (ptrdiff_t i = symset_hash (sym, ss->bits); ; i = (i + 1) & mask)
+ {
+ Lisp_Object s = tbl->entries[i];
+ if (BASE_EQ (s, sym))
+ return false; /* Previous occurrence found. */
+ if (BASE_EQ (s, Qunbound))
+ {
+ /* Not in set, add it. */
+ tbl->entries[i] = sym;
+ ss->count++;
+ return true;
+ }
+ }
+}
- json_t *json = lisp_to_json (args[0], &conf);
- record_unwind_protect_ptr (json_release_object, json);
+static NO_INLINE void
+json_out_grow (json_out_t *jo, ptrdiff_t bytes)
+{
+ ptrdiff_t need = jo->size + bytes;
+ ptrdiff_t new_size = max (need, 512);
+ while (new_size < need)
+ new_size <<= 1;
+ jo->buf = xrealloc (jo->buf, new_size);
+ jo->capacity = new_size;
+}
- char *string = json_dumps (json, JSON_COMPACT | JSON_ENCODE_ANY);
- if (string == NULL)
- json_out_of_memory ();
- record_unwind_protect_ptr (json_free, string);
+static void
+cleanup_json_out (void *arg)
+{
+ json_out_t *jo = arg;
+ xfree (jo->buf);
+ cleanup_symset_tables (jo->ss_table);
+}
- return unbind_to (count, build_string_from_utf8 (string));
+/* Make room for `bytes` more bytes in buffer. */
+static void
+json_make_room (json_out_t *jo, ptrdiff_t bytes)
+{
+ if (bytes > jo->capacity - jo->size)
+ json_out_grow (jo, bytes);
}
-struct json_buffer_and_size
+#define JSON_OUT_STR(jo, str) (json_out_str (jo, str, sizeof (str) - 1))
+
+/* Add `bytes` bytes from `str` to the buffer. */
+static void
+json_out_str (json_out_t *jo, const char *str, size_t bytes)
{
- const char *buffer;
- ptrdiff_t size;
- /* This tracks how many bytes were inserted by the callback since
- json_dump_callback was called. */
- ptrdiff_t inserted_bytes;
+ json_make_room (jo, bytes);
+ memcpy (jo->buf + jo->size, str, bytes);
+ jo->size += bytes;
+}
+
+static void
+json_out_byte (json_out_t *jo, unsigned char c)
+{
+ json_make_room (jo, 1);
+ jo->buf[jo->size++] = c;
+}
+
+static void
+json_out_fixnum (json_out_t *jo, EMACS_INT x)
+{
+ char buf[INT_BUFSIZE_BOUND (EMACS_INT)];
+ char *end = buf + sizeof buf;
+ char *p = fixnum_to_string (x, buf, end);
+ json_out_str (jo, p, end - p);
+}
+
+static AVOID
+string_not_unicode (Lisp_Object obj)
+{
+ /* FIXME: for test compatibility, not a very descriptive error */
+ wrong_type_argument (Qjson_value_p, obj);
+}
+
+static unsigned char json_plain_char[256] = {
+ /* 32 chars/line: 1 for printable ASCII + DEL except " and \, 0 elsewhere */
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 00-1f */
+ 1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 20-3f */
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1, /* 40-5f */
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 60-7f */
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 80-9f */
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* a0-bf */
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* c0-df */
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* e0-ff */
};
-static Lisp_Object
-json_insert (void *data)
+static void
+json_out_string (json_out_t *jo, Lisp_Object str, int skip)
{
- struct json_buffer_and_size *buffer_and_size = data;
- ptrdiff_t len = buffer_and_size->size;
- ptrdiff_t inserted_bytes = buffer_and_size->inserted_bytes;
- ptrdiff_t gap_size = GAP_SIZE - inserted_bytes;
-
- /* Enlarge the gap if necessary. */
- if (gap_size < len)
- make_gap (len - gap_size);
-
- /* Copy this chunk of data into the gap. */
- memcpy ((char *) BEG_ADDR + PT_BYTE - BEG_BYTE + inserted_bytes,
- buffer_and_size->buffer, len);
- buffer_and_size->inserted_bytes += len;
- return Qnil;
+ /* FIXME: this code is slow, make faster! */
+
+ static const char hexchar[16] = "0123456789ABCDEF";
+ ptrdiff_t len = SBYTES (str);
+ json_make_room (jo, len + 2);
+ json_out_byte (jo, '"');
+ unsigned char *p = SDATA (str);
+ unsigned char *end = p + len;
+ p += skip;
+ while (p < end)
+ {
+ unsigned char c = *p;
+ if (json_plain_char[c])
+ {
+ json_out_byte (jo, c);
+ p++;
+ }
+ else if (c > 0x7f)
+ {
+ if (STRING_MULTIBYTE (str))
+ {
+ int n;
+ if (c <= 0xc1)
+ string_not_unicode (str);
+ if (c <= 0xdf)
+ n = 2;
+ else if (c <= 0xef)
+ {
+ int v = (((c & 0x0f) << 12)
+ + ((p[1] & 0x3f) << 6) + (p[2] & 0x3f));
+ if (char_surrogate_p (v))
+ string_not_unicode (str);
+ n = 3;
+ }
+ else if (c <= 0xf7)
+ {
+ int v = (((c & 0x07) << 18)
+ + ((p[1] & 0x3f) << 12)
+ + ((p[2] & 0x3f) << 6)
+ + (p[3] & 0x3f));
+ if (v > MAX_UNICODE_CHAR)
+ string_not_unicode (str);
+ n = 4;
+ }
+ else
+ string_not_unicode (str);
+ json_out_str (jo, (const char *)p, n);
+ jo->chars_delta += n - 1;
+ p += n;
+ }
+ else
+ string_not_unicode (str);
+ }
+ else
+ {
+ json_out_byte (jo, '\\');
+ switch (c)
+ {
+ case '"':
+ case '\\': json_out_byte (jo, c); break;
+ case '\b': json_out_byte (jo, 'b'); break;
+ case '\t': json_out_byte (jo, 't'); break;
+ case '\n': json_out_byte (jo, 'n'); break;
+ case '\f': json_out_byte (jo, 'f'); break;
+ case '\r': json_out_byte (jo, 'r'); break;
+ default:
+ {
+ char hex[5] = { 'u', '0', '0',
+ hexchar[c >> 4], hexchar[c & 0xf] };
+ json_out_str (jo, hex, 5);
+ break;
+ }
+ }
+ p++;
+ }
+ }
+ json_out_byte (jo, '"');
}
-static Lisp_Object
-json_handle_nonlocal_exit (enum nonlocal_exit type, Lisp_Object data)
+static void
+json_out_nest (json_out_t *jo)
+{
+ --jo->maxdepth;
+ if (jo->maxdepth < 0)
+ error ("Maximum JSON serialisation depth exceeded");
+}
+
+static void
+json_out_unnest (json_out_t *jo)
{
- switch (type)
+ ++jo->maxdepth;
+}
+
+static void json_out_something (json_out_t *jo, Lisp_Object obj);
+
+static void
+json_out_object_cons (json_out_t *jo, Lisp_Object obj)
+{
+ json_out_nest (jo);
+ symset_t ss = push_symset (jo);
+ json_out_byte (jo, '{');
+ bool is_alist = CONSP (XCAR (obj));
+ bool first = true;
+ Lisp_Object tail = obj;
+ FOR_EACH_TAIL (tail)
{
- case NONLOCAL_EXIT_SIGNAL:
- return data;
- case NONLOCAL_EXIT_THROW:
- return Fcons (Qno_catch, data);
- default:
- eassume (false);
+ Lisp_Object key;
+ Lisp_Object value;
+ if (is_alist)
+ {
+ Lisp_Object pair = XCAR (tail);
+ CHECK_CONS (pair);
+ key = XCAR (pair);
+ value = XCDR (pair);
+ }
+ else
+ {
+ key = XCAR (tail);
+ tail = XCDR (tail);
+ CHECK_CONS (tail);
+ value = XCAR (tail);
+ }
+ key = maybe_remove_pos_from_symbol (key);
+ CHECK_TYPE (BARE_SYMBOL_P (key), Qsymbolp, key);
+
+ if (symset_add (jo, &ss, key))
+ {
+ if (!first)
+ json_out_byte (jo, ',');
+ first = false;
+
+ Lisp_Object key_str = SYMBOL_NAME (key);
+ const char *str = SSDATA (key_str);
+ /* Skip leading ':' in plist keys. */
+ int skip = !is_alist && str[0] == ':' && str[1] ? 1 : 0;
+ json_out_string (jo, key_str, skip);
+ json_out_byte (jo, ':');
+ json_out_something (jo, value);
+ }
+ }
+ CHECK_LIST_END (tail, obj);
+ json_out_byte (jo, '}');
+ pop_symset (jo, &ss);
+ json_out_unnest (jo);
+}
+
+static void
+json_out_object_hash (json_out_t *jo, Lisp_Object obj)
+{
+ json_out_nest (jo);
+ json_out_byte (jo, '{');
+ struct Lisp_Hash_Table *h = XHASH_TABLE (obj);
+ bool first = true;
+ DOHASH (h, k, v)
+ {
+ if (!first)
+ json_out_byte (jo, ',');
+ first = false;
+ /* FIXME: do we care about dup keys here? (probably not) */
+ CHECK_STRING (k);
+ json_out_string (jo, k, 0);
+ json_out_byte (jo, ':');
+ json_out_something (jo, v);
}
+ json_out_byte (jo, '}');
+ json_out_unnest (jo);
+
}
-struct json_insert_data
+static void
+json_out_array (json_out_t *jo, Lisp_Object obj)
{
- /* This tracks how many bytes were inserted by the callback since
- json_dump_callback was called. */
- ptrdiff_t inserted_bytes;
- /* nil if json_insert succeeded, otherwise the symbol
- Qcatch_all_memory_full or a cons (ERROR-SYMBOL . ERROR-DATA). */
- Lisp_Object error;
-};
+ json_out_nest (jo);
+ json_out_byte (jo, '[');
+ ptrdiff_t n = ASIZE (obj);
+ for (ptrdiff_t i = 0; i < n; i++)
+ {
+ if (i > 0)
+ json_out_byte (jo, ',');
+ json_out_something (jo, AREF (obj, i));
+ }
+ json_out_byte (jo, ']');
+ json_out_unnest (jo);
+}
-/* Callback for json_dump_callback that inserts a JSON representation
- as a unibyte string into the gap. DATA must point to a structure
- of type json_insert_data. This function may not exit nonlocally.
- It catches all nonlocal exits and stores them in data->error for
- reraising. */
+static void
+json_out_float (json_out_t *jo, Lisp_Object f)
+{
+ double x = XFLOAT_DATA (f);
+ if (isinf (x) || isnan (x))
+ signal_error ("not a finite number", f);
+ json_make_room (jo, FLOAT_TO_STRING_BUFSIZE);
+ int n = float_to_string (jo->buf + jo->size, x);
+ jo->size += n;
+}
-static int
-json_insert_callback (const char *buffer, size_t size, void *data)
+static void
+json_out_bignum (json_out_t *jo, Lisp_Object x)
{
- struct json_insert_data *d = data;
- struct json_buffer_and_size buffer_and_size
- = {.buffer = buffer, .size = size, .inserted_bytes = d->inserted_bytes};
- d->error = internal_catch_all (json_insert, &buffer_and_size,
- json_handle_nonlocal_exit);
- d->inserted_bytes = buffer_and_size.inserted_bytes;
- return NILP (d->error) ? 0 : -1;
+ int base = 10;
+ ptrdiff_t size = bignum_bufsize (x, base);
+ json_make_room (jo, size);
+ int n = bignum_to_c_string (jo->buf + jo->size, size, x, base);
+ jo->size += n;
+}
+
+static void
+json_out_something (json_out_t *jo, Lisp_Object obj)
+{
+ if (EQ (obj, jo->conf.null_object))
+ JSON_OUT_STR (jo, "null");
+ else if (EQ (obj, jo->conf.false_object))
+ JSON_OUT_STR (jo, "false");
+ else if (EQ (obj, Qt))
+ JSON_OUT_STR (jo, "true");
+ else if (NILP (obj))
+ JSON_OUT_STR (jo, "{}");
+ else if (FIXNUMP (obj))
+ json_out_fixnum (jo, XFIXNUM (obj));
+ else if (STRINGP (obj))
+ json_out_string (jo, obj, 0);
+ else if (CONSP (obj))
+ json_out_object_cons (jo, obj);
+ else if (FLOATP (obj))
+ json_out_float (jo, obj);
+ else if (HASH_TABLE_P (obj))
+ json_out_object_hash (jo, obj);
+ else if (VECTORP (obj))
+ json_out_array (jo, obj);
+ else if (BIGNUMP (obj))
+ json_out_bignum (jo, obj);
+ else
+ wrong_type_argument (Qjson_value_p, obj);
+}
+
+static Lisp_Object
+json_out_string_result (json_out_t *jo)
+{
+ /* FIXME: should this be a unibyte or multibyte string?
+ Right now we make a multibyte string for test compatibility,
+ but we are really encoding so unibyte would make more sense. */
+ ptrdiff_t nchars = jo->size - jo->chars_delta;
+ return make_multibyte_string (jo->buf, nchars, jo->size);
+}
+
+DEFUN ("json-serialize", Fjson_serialize, Sjson_serialize, 1, MANY,
+ NULL,
+ doc: /* Return the JSON representation of OBJECT as a string.
+
+OBJECT must be t, a number, string, vector, hashtable, alist, plist,
+or the Lisp equivalents to the JSON null and false values, and its
+elements must recursively consist of the same kinds of values. t will
+be converted to the JSON true value. Vectors will be converted to
+JSON arrays, whereas hashtables, alists and plists are converted to
+JSON objects. Hashtable keys must be strings and must be unique within
+each object. Alist and plist keys must be symbols; if a key is duplicate,
+the first instance is used.
+
+The Lisp equivalents to the JSON null and false values are
+configurable in the arguments ARGS, a list of keyword/argument pairs:
+
+The keyword argument `:null-object' specifies which object to use
+to represent a JSON null value. It defaults to `:null'.
+
+The keyword argument `:false-object' specifies which object to use to
+represent a JSON false value. It defaults to `:false'.
+
+In you specify the same value for `:null-object' and `:false-object',
+a potentially ambiguous situation, the JSON output will not contain
+any JSON false values.
+usage: (json-serialize OBJECT &rest ARGS) */)
+ (ptrdiff_t nargs, Lisp_Object *args)
+{
+ json_out_t jo = {
+ .maxdepth = 25,
+ .conf = {json_object_hashtable, json_array_array, QCnull, QCfalse}
+ };
+ json_parse_args (nargs - 1, args + 1, &jo.conf, false);
+ Lisp_Object object = args[0];
+
+ specpdl_ref count = SPECPDL_INDEX ();
+ record_unwind_protect_ptr (cleanup_json_out, &jo);
+ json_out_something (&jo, object);
+ return unbind_to (count, json_out_string_result (&jo));
}
DEFUN ("json-insert", Fjson_insert, Sjson_insert, 1, MANY,
@@ -714,72 +890,61 @@ DEFUN ("json-insert", Fjson_insert, Sjson_insert, 1, MANY,
usage: (json-insert OBJECT &rest ARGS) */)
(ptrdiff_t nargs, Lisp_Object *args)
{
- specpdl_ref count = SPECPDL_INDEX ();
+ json_out_t jo = {
+ .maxdepth = 25,
+ .conf = {json_object_hashtable, json_array_array, QCnull, QCfalse}
+ };
+ json_parse_args (nargs - 1, args + 1, &jo.conf, false);
+ Lisp_Object object = args[0];
-#ifdef WINDOWSNT
- ensure_json_available ();
-#endif
-
- struct json_configuration conf =
- {json_object_hashtable, json_array_array, QCnull, QCfalse};
- json_parse_args (nargs - 1, args + 1, &conf, false);
+ specpdl_ref count = SPECPDL_INDEX ();
+ record_unwind_protect_ptr (cleanup_json_out, &jo);
+ json_out_something (&jo, object);
- json_t *json = lisp_to_json (args[0], &conf);
- record_unwind_protect_ptr (json_release_object, json);
+ /* FIXME: Do we really need to do all this work below to insert a string?
+ Is there no function already written? I must be missing something. */
prepare_to_modify_buffer (PT, PT, NULL);
move_gap_both (PT, PT_BYTE);
- struct json_insert_data data;
- data.inserted_bytes = 0;
- /* Could have used json_dumpb, but that became available only in
- Jansson 2.10, whereas we want to support 2.7 and upward. */
- int status = json_dump_callback (json, json_insert_callback, &data,
- JSON_COMPACT | JSON_ENCODE_ANY);
- if (status == -1)
- {
- if (CONSP (data.error))
- xsignal (XCAR (data.error), XCDR (data.error));
- else
- json_out_of_memory ();
- }
+ if (GAP_SIZE < jo.size)
+ make_gap (jo.size - GAP_SIZE);
+ memcpy ((char *) BEG_ADDR + PT_BYTE - BEG_BYTE, jo.buf, jo.size);
+
+ /* No need to keep allocation beyond this point. */
+ unbind_to (count, Qnil);
ptrdiff_t inserted = 0;
- ptrdiff_t inserted_bytes = data.inserted_bytes;
- if (inserted_bytes > 0)
+ ptrdiff_t inserted_bytes = jo.size;
+
+ /* If required, decode the stuff we've read into the gap. */
+ struct coding_system coding;
+ /* JSON strings are UTF-8 encoded strings. */
+ setup_coding_system (Qutf_8_unix, &coding);
+ coding.dst_multibyte = !NILP (BVAR (current_buffer,
+ enable_multibyte_characters));
+ if (CODING_MAY_REQUIRE_DECODING (&coding))
{
- /* If required, decode the stuff we've read into the gap. */
- struct coding_system coding;
- /* JSON strings are UTF-8 encoded strings. If for some reason
- the text returned by the Jansson library includes invalid
- byte sequences, they will be represented by raw bytes in the
- buffer text. */
- setup_coding_system (Qutf_8_unix, &coding);
- coding.dst_multibyte =
- !NILP (BVAR (current_buffer, enable_multibyte_characters));
- if (CODING_MAY_REQUIRE_DECODING (&coding))
- {
- /* Now we have all the new bytes at the beginning of the gap,
- but `decode_coding_gap` needs them at the end of the gap, so
- we need to move them. */
- memmove (GAP_END_ADDR - inserted_bytes, GPT_ADDR, inserted_bytes);
- decode_coding_gap (&coding, inserted_bytes);
- inserted = coding.produced_char;
- }
- else
- {
- /* Make the inserted text part of the buffer, as unibyte text. */
- eassert (NILP (BVAR (current_buffer, enable_multibyte_characters)));
- insert_from_gap_1 (inserted_bytes, inserted_bytes, false);
-
- /* The target buffer is unibyte, so we don't need to decode. */
- invalidate_buffer_caches (current_buffer,
- PT, PT + inserted_bytes);
- adjust_after_insert (PT, PT_BYTE,
- PT + inserted_bytes,
- PT_BYTE + inserted_bytes,
- inserted_bytes);
- inserted = inserted_bytes;
- }
+ /* Now we have all the new bytes at the beginning of the gap,
+ but `decode_coding_gap` needs them at the end of the gap, so
+ we need to move them. */
+ memmove (GAP_END_ADDR - inserted_bytes, GPT_ADDR, inserted_bytes);
+ decode_coding_gap (&coding, inserted_bytes);
+ inserted = coding.produced_char;
+ }
+ else
+ {
+ /* Make the inserted text part of the buffer, as unibyte text. */
+ eassert (NILP (BVAR (current_buffer, enable_multibyte_characters)));
+ insert_from_gap_1 (inserted_bytes, inserted_bytes, false);
+
+ /* The target buffer is unibyte, so we don't need to decode. */
+ invalidate_buffer_caches (current_buffer,
+ PT, PT + inserted_bytes);
+ adjust_after_insert (PT, PT_BYTE,
+ PT + inserted_bytes,
+ PT_BYTE + inserted_bytes,
+ inserted_bytes);
+ inserted = inserted_bytes;
}
/* Call after-change hooks. */
@@ -791,7 +956,7 @@ DEFUN ("json-insert", Fjson_insert, Sjson_insert, 1, MANY,
SET_PT_BOTH (PT + inserted, PT_BYTE + inserted_bytes);
}
- return unbind_to (count, Qnil);
+ return Qnil;
}
/* Convert a JSON object to a Lisp object. */
diff --git a/test/src/json-tests.el b/test/src/json-tests.el
index dffc6291ca1..351d909f05b 100644
--- a/test/src/json-tests.el
+++ b/test/src/json-tests.el
@@ -126,11 +126,38 @@ json-serialize/object
(ert-deftest json-serialize/object-with-duplicate-keys ()
(skip-unless (fboundp 'json-serialize))
- (let ((table (make-hash-table :test #'eq)))
- (puthash (copy-sequence "abc") [1 2 t] table)
- (puthash (copy-sequence "abc") :null table)
- (should (equal (hash-table-count table) 2))
- (should-error (json-serialize table) :type 'wrong-type-argument)))
+
+ (dolist (n '(1 5 20 100))
+ (let ((symbols (mapcar (lambda (i) (make-symbol (format "s%d" i)))
+ (number-sequence 1 n)))
+ (expected (concat "{"
+ (mapconcat (lambda (i) (format "\"s%d\":%d" i i))
+ (number-sequence 1 n) ",")
+ "}")))
+ ;; alist
+ (should (equal (json-serialize
+ (append
+ (cl-mapcar #'cons
+ symbols (number-sequence 1 n))
+ (cl-mapcar #'cons
+ symbols (number-sequence 1001 (+ 1000 n)))))
+ expected))
+ ;; plist
+ (should (equal (json-serialize
+ (append
+ (cl-mapcan #'list
+ symbols (number-sequence 1 n))
+ (cl-mapcan #'list
+ symbols (number-sequence 1001 (+ 1000 n)))))
+ expected))))
+
+ ;; We don't check for duplicated keys in hash tables.
+ ;; (let ((table (make-hash-table :test #'eq)))
+ ;; (puthash (copy-sequence "abc") [1 2 t] table)
+ ;; (puthash (copy-sequence "abc") :null table)
+ ;; (should (equal (hash-table-count table) 2))
+ ;; (should-error (json-serialize table) :type 'wrong-type-argument))
+ )
(ert-deftest json-parse-string/object ()
(skip-unless (fboundp 'json-parse-string))
@@ -174,7 +201,10 @@ json-serialize/string
"[\"\\nasdфыв\\u001F\u007ffgh\\t\"]"))
(should (equal (json-serialize ["a\0b"]) "[\"a\\u0000b\"]"))
;; FIXME: Is this the right behavior?
- (should (equal (json-serialize ["\u00C4\xC3\x84"]) "[\"\u00C4\u00C4\"]")))
+ ;; FIXME: (no it's not)
+ ;; (should (equal (json-serialize ["\u00C4\xC3\x84"]) "[\"\u00C4\u00C4\"]"))
+ (should-error (json-serialize ["\u00C4\xC3\x84"]))
+ )
(ert-deftest json-serialize/invalid-unicode ()
(skip-unless (fboundp 'json-serialize))
^ permalink raw reply related [flat|nested] 37+ messages in thread
* bug#70007: [PATCH] native JSON encoder
2024-03-27 15:49 ` Mattias Engdegård
@ 2024-03-27 17:40 ` Eli Zaretskii
2024-03-27 18:57 ` Mattias Engdegård
0 siblings, 1 reply; 37+ messages in thread
From: Eli Zaretskii @ 2024-03-27 17:40 UTC (permalink / raw)
To: Mattias Engdegård, Yuan Fu; +Cc: 70007
> From: Mattias Engdegård <mattias.engdegard@gmail.com>
> Date: Wed, 27 Mar 2024 16:49:53 +0100
> Cc: 70007@debbugs.gnu.org
>
> Here is an updated patch. It now ignores duplicated keys in objects represented by alists and plists, just like the old encoder. (I didn't include this in the first draft out of fear it would be slow and complicated, but it turned out just to be complicated.)
>
> The performance is still acceptable, which means at least 2x the speed of the Jansson-based encoder.
Thanks. A few initial comments and questions, based on a very cursory
reading.
> +/* JSON encoding context */
This is not our comment style.
> +typedef struct {
> + char *buf;
> + ptrdiff_t size; /* number of bytes in buf */
> + ptrdiff_t capacity; /* allocated size of buf */
> + ptrdiff_t chars_delta; /* size - {number of Unicode chars in buf} */
When you say "Unicode chars", what do you mean? characters or bytes?
If characters, then why do you need to qualify them with "Unicode"?
> +struct symset_tbl
> +{
> + /* Table used by the containing object if any, so that we can easily
> + all tables if an error occurs. */
> + struct symset_tbl *up;
> + /* Table of symbols (2**bits entries), Qunbound where unused. */
> + Lisp_Object entries[];
^^
Is this portable enough?
> +static struct symset_tbl *
> +alloc_symset_table (int bits)
> +{
> + struct symset_tbl *st = xmalloc (sizeof *st + (sizeof *st->entries << bits));
> + int size = 1 << bits;
I'd add an assertion here that BITS is not large enough to produce zero.
> +/* Enlarge the table used by a symset. */
^^
Two spaces there, please.
> +static NO_INLINE void
> +symset_expand (symset_t *ss)
> +{
> + struct symset_tbl *old_table = ss->table;
> + int oldbits = ss->bits;
> + int oldsize = 1 << oldbits;
I'd add an assertion here about the magnitude of BITS.
> + while (p < end)
> + {
> + unsigned char c = *p;
> + if (json_plain_char[c])
> + {
> + json_out_byte (jo, c);
> + p++;
> + }
> + else if (c > 0x7f)
> + {
> + if (STRING_MULTIBYTE (str))
> + {
> + int n;
> + if (c <= 0xc1)
> + string_not_unicode (str);
> + if (c <= 0xdf)
> + n = 2;
> + else if (c <= 0xef)
> + {
> + int v = (((c & 0x0f) << 12)
> + + ((p[1] & 0x3f) << 6) + (p[2] & 0x3f));
> + if (char_surrogate_p (v))
> + string_not_unicode (str);
> + n = 3;
> + }
> + else if (c <= 0xf7)
> + {
> + int v = (((c & 0x07) << 18)
> + + ((p[1] & 0x3f) << 12)
> + + ((p[2] & 0x3f) << 6)
> + + (p[3] & 0x3f));
> + if (v > MAX_UNICODE_CHAR)
> + string_not_unicode (str);
> + n = 4;
> + }
> + else
> + string_not_unicode (str);
> + json_out_str (jo, (const char *)p, n);
> + jo->chars_delta += n - 1;
> + p += n;
> + }
> + else
> + string_not_unicode (str);
This rejects unibyte non-ASCII strings, AFAU, in which case I suggest
to think whether we really want that. E.g., why is it wrong to encode
a string to UTF-8, and then send it to JSON?
> +static void
> +json_out_float (json_out_t *jo, Lisp_Object f)
> +{
> + double x = XFLOAT_DATA (f);
> + if (isinf (x) || isnan (x))
> + signal_error ("not a finite number", f);
Is JSON unable to handle Inf and NaN?
> +static Lisp_Object
> +json_out_string_result (json_out_t *jo)
> +{
> + /* FIXME: should this be a unibyte or multibyte string?
> + Right now we make a multibyte string for test compatibility,
> + but we are really encoding so unibyte would make more sense. */
I indeed think this should be a unibyte string, because otherwise
writing it to a file or a process will/might encode it, which would be
wrong.
> + json_out_t jo = {
> + .maxdepth = 25,
Is this arbitrary, or is it what JSON expects? If arbitrary, should
it be customizable? should it be documented?
> + /* FIXME: Do we really need to do all this work below to insert a string?
> + Is there no function already written? I must be missing something. */
There is no function. All the insert_from_* functions in insdel.c do
something similar.
Btw, shouldn't json-insert call treesit_record_change? Yuan?
^ permalink raw reply [flat|nested] 37+ messages in thread
* bug#70007: [PATCH] native JSON encoder
2024-03-27 17:40 ` Eli Zaretskii
@ 2024-03-27 18:57 ` Mattias Engdegård
2024-03-27 19:05 ` Eli Zaretskii
0 siblings, 1 reply; 37+ messages in thread
From: Mattias Engdegård @ 2024-03-27 18:57 UTC (permalink / raw)
To: Eli Zaretskii; +Cc: Yuan Fu, 70007
Eli, thank you for your comments!
27 mars 2024 kl. 18.40 skrev Eli Zaretskii <eliz@gnu.org>:
>> +/* JSON encoding context */
>
> This is not our comment style.
I'll go through the code and clean up all comments.
>> +typedef struct {
>> + char *buf;
>> + ptrdiff_t size; /* number of bytes in buf */
>> + ptrdiff_t capacity; /* allocated size of buf */
>> + ptrdiff_t chars_delta; /* size - {number of Unicode chars in buf} */
>
> When you say "Unicode chars", what do you mean? characters or bytes?
> If characters, then why do you need to qualify them with "Unicode"?
Characters. Will clarify.
>> + Lisp_Object entries[];
> ^^
> Is this portable enough?
Something I'd like to know, too. We rely on C99 in many other aspects. Are there still compilers that are important to us but don't get this right?
10 years ago this was apparently an issue for IBM XL C 12.1, but modern versions are based on Clang. We could take our chances here; obviously we'll change it if someone complains but it seems unlikely. What do you think?
> I'd add an assertion here that BITS is not large enough to produce zero.
I'll deal with that in some way or another.
> This rejects unibyte non-ASCII strings, AFAU, in which case I suggest
> to think whether we really want that. E.g., why is it wrong to encode
> a string to UTF-8, and then send it to JSON?
The way I see it, that would break the JSON abstraction: it transports strings of Unicode characters, not strings of bytes. A user who for some reason has a string of bytes that encode Unicode characters can just decode it in order to prove it to us. It's not the JSON encoder's job to decode the user's strings.
(It would also be a pain to deal with and risks slowing down the string serialiser even if it's a case that never happens.)
> Is JSON unable to handle Inf and NaN?
That's right.
>> + /* FIXME: should this be a unibyte or multibyte string?
>> + Right now we make a multibyte string for test compatibility,
>> + but we are really encoding so unibyte would make more sense. */
>
> I indeed think this should be a unibyte string, because otherwise
> writing it to a file or a process will/might encode it, which would be
> wrong.
I would prefer that, too, but used multibyte for compatibility with the old code and so that its tests pass.
It should probably be a separate change if we decide that unibyte is better here.
>> + json_out_t jo = {
>> + .maxdepth = 25,
>
> Is this arbitrary, or is it what JSON expects? If arbitrary, should
> it be customizable? should it be documented?
It's semi-arbitrary but reasonable: the JSON_checker at json.org uses a maximum depth of 20 by default, and many implementations use its test suite. RFC-8259 states that the maximum depth is implementation-dependent.
It's hardly worth making this into a parameter for the user to adjust but I'll clarify the code.
>> + /* FIXME: Do we really need to do all this work below to insert a string?
>> + Is there no function already written? I must be missing something. */
>
> There is no function. All the insert_from_* functions in insdel.c do
> something similar.
Thank you for confirming that. Looks like we could use some abstraction then.
^ permalink raw reply [flat|nested] 37+ messages in thread
* bug#70007: [PATCH] native JSON encoder
2024-03-27 18:57 ` Mattias Engdegård
@ 2024-03-27 19:05 ` Eli Zaretskii
2024-03-28 20:59 ` Mattias Engdegård
0 siblings, 1 reply; 37+ messages in thread
From: Eli Zaretskii @ 2024-03-27 19:05 UTC (permalink / raw)
To: Mattias Engdegård; +Cc: casouri, 70007
> From: Mattias Engdegård <mattias.engdegard@gmail.com>
> Date: Wed, 27 Mar 2024 19:57:24 +0100
> Cc: Yuan Fu <casouri@gmail.com>,
> 70007@debbugs.gnu.org
>
> Eli, thank you for your comments!
Thanks for working on this in the first place.
> > This rejects unibyte non-ASCII strings, AFAU, in which case I suggest
> > to think whether we really want that. E.g., why is it wrong to encode
> > a string to UTF-8, and then send it to JSON?
>
> The way I see it, that would break the JSON abstraction: it transports strings of Unicode characters, not strings of bytes.
What's the difference? AFAIU, JSON expects UTF-8 encoded strings, and
whether that is used as a sequence of bytes or a sequence of
characters is in the eyes of the beholder: the bytestream is the same,
only the interpretation changes. So I'm not sure I understand how
this would break the assumption.
> A user who for some reason has a string of bytes that encode Unicode characters can just decode it in order to prove it to us. It's not the JSON encoder's job to decode the user's strings.
I didn't suggest to decode the input string, not at all. I suggested
to allow unibyte strings, and process them just like you process
pure-ASCII strings, leaving it to the caller to make sure the string
has only valid UTF-8 sequences. Forcing callers to decode such
strings is IMO too harsh and largely unjustified.
> (It would also be a pain to deal with and risks slowing down the string serialiser even if it's a case that never happens.)
I don't understand why. Once again, I'm just talking about passing
the bytes through as you do with ASCII characters.
^ permalink raw reply [flat|nested] 37+ messages in thread
* bug#70007: [PATCH] native JSON encoder
2024-03-26 16:46 ` Eli Zaretskii
2024-03-27 12:46 ` Mattias Engdegård
@ 2024-03-28 19:16 ` Theodor Thornhill via Bug reports for GNU Emacs, the Swiss army knife of text editors
1 sibling, 0 replies; 37+ messages in thread
From: Theodor Thornhill via Bug reports for GNU Emacs, the Swiss army knife of text editors @ 2024-03-28 19:16 UTC (permalink / raw)
To: Eli Zaretskii; +Cc: Mattias Engdegård, 70007
Eli Zaretskii <eliz@gnu.org> writes:
>> From: Mattias Engdegård <mattias.engdegard@gmail.com>
>> Date: Tue, 26 Mar 2024 16:33:52 +0100
>>
>> If we replace the lisp-to-JSON encoder with native code, we would not need the jansson library for it and it would be faster.
>>
>> There is ongoing work on a JSON-to-lisp parser, but the author has made it clear that he does not have time to write an encoder, so I spent a morning mashing up the attached patch.
>
> Thanks for working on this.
>
>> It generally produces the same result as the old code, except:
>>
>> - The old code incorrectly accepted strings with non-Unicode characters (raw bytes). There is no reason to do this; JSON is UTF-8 only.
>
> Would it complicate the code not to reject raw bytes? I'd like to
> avoid incompatibilities if it's practical. Also, Emacs traditionally
> doesn't reject raw bytes, leaving that to the application or the user.
>
>> I'd be very happy if someone could test it with packages that use this interface (json-serialise, json-insert).
>
> Yes, please.
I've been using this along with the json-to-lisp parser for some time
now, and I'm really happy to see these improvements. Thanks a lot!
I haven't seen any issues thus far, and emacs is much more responsive.
I hope both of these patches will soon arrive on emacs 30.
I'll continue using and testing both until then.
Thanks,
Theo
^ permalink raw reply [flat|nested] 37+ messages in thread
* bug#70007: [PATCH] native JSON encoder
2024-03-27 19:05 ` Eli Zaretskii
@ 2024-03-28 20:59 ` Mattias Engdegård
2024-03-29 6:04 ` Eli Zaretskii
0 siblings, 1 reply; 37+ messages in thread
From: Mattias Engdegård @ 2024-03-28 20:59 UTC (permalink / raw)
To: Eli Zaretskii; +Cc: casouri, 70007
27 mars 2024 kl. 20.05 skrev Eli Zaretskii <eliz@gnu.org>:
>>> This rejects unibyte non-ASCII strings, AFAU, in which case I suggest
>>> to think whether we really want that. E.g., why is it wrong to encode
>>> a string to UTF-8, and then send it to JSON?
>>
>> The way I see it, that would break the JSON abstraction: it transports strings of Unicode characters, not strings of bytes.
>
> What's the difference? AFAIU, JSON expects UTF-8 encoded strings, and
> whether that is used as a sequence of bytes or a sequence of
> characters is in the eyes of the beholder: the bytestream is the same,
> only the interpretation changes.
Well no -- JSON transports Unicode strings: the JSON serialiser takes a Unicode string as input and outputs a byte sequence; the JSON parser takes a byte sequence and returns a Unicode string (assuming we are just interested in strings).
That the transport format uses UTF-8 is unrelated; if the user hands an encoded byte sequence to us then it seems more likely that it's a mistake. After all, it cannot have come from a received JSON message.
I think it was just an another artefact of the old implementation. That code incorrectly used encode_string_utf_8 even on non-ASCII unibyte strings and trusted Jansson to validate the result. That resulted in a lot of wasted work and some strange strings getting accepted.
While it's theoretically possible that there are users with code relying on this behaviour, I can't find any evidence for it in the packages that I've looked at.
> I didn't suggest to decode the input string, not at all. I suggested
> to allow unibyte strings, and process them just like you process
> pure-ASCII strings, leaving it to the caller to make sure the string
> has only valid UTF-8 sequences.
Users of this raw-bytes-input feature (if they exist at all) previously had their input validated by Jansson. While mistakes would probably be detected at the other end I'm not sure it's a good idea.
> Forcing callers to decode such
> strings is IMO too harsh and largely unjustified.
We usually force them to do so in most other contexts. To take a random example, `princ` doesn't work with encoded strings. But it's rarely a problem.
Let's see how testing goes. We'll find a solution no matter what, pass-through or separate slow-path validation, if it turns out that we really need to after all.
^ permalink raw reply [flat|nested] 37+ messages in thread
* bug#70007: [PATCH] native JSON encoder
2024-03-28 20:59 ` Mattias Engdegård
@ 2024-03-29 6:04 ` Eli Zaretskii
2024-03-30 11:41 ` Mattias Engdegård
0 siblings, 1 reply; 37+ messages in thread
From: Eli Zaretskii @ 2024-03-29 6:04 UTC (permalink / raw)
To: Mattias Engdegård; +Cc: casouri, 70007
> From: Mattias Engdegård <mattias.engdegard@gmail.com>
> Date: Thu, 28 Mar 2024 21:59:38 +0100
> Cc: casouri@gmail.com,
> 70007@debbugs.gnu.org
>
> 27 mars 2024 kl. 20.05 skrev Eli Zaretskii <eliz@gnu.org>:
>
> >>> This rejects unibyte non-ASCII strings, AFAU, in which case I suggest
> >>> to think whether we really want that. E.g., why is it wrong to encode
> >>> a string to UTF-8, and then send it to JSON?
> >>
> >> The way I see it, that would break the JSON abstraction: it transports strings of Unicode characters, not strings of bytes.
> >
> > What's the difference? AFAIU, JSON expects UTF-8 encoded strings, and
> > whether that is used as a sequence of bytes or a sequence of
> > characters is in the eyes of the beholder: the bytestream is the same,
> > only the interpretation changes.
>
> Well no -- JSON transports Unicode strings: the JSON serialiser takes a Unicode string as input and outputs a byte sequence; the JSON parser takes a byte sequence and returns a Unicode string (assuming we are just interested in strings).
>
> That the transport format uses UTF-8 is unrelated;
It is not unrelated. A JSON stream is AFAIK supposed to have strings
represented in UTF-8 encoding. When a Lisp program produces a JSON
stream, all that should matter to it is that any string there has a
valid UTF-8 sequence; where and how that sequence was obtained is of
secondary importance.
> if the user hands an encoded byte sequence to us then it seems more likely that it's a mistake.
We don't know that. Since Emacs lets Lisp programs produce unibyte
UTF-8 encoded strings very easily, a program could do just that, for
whatever reasons. Unless we have very serious reasons not to allow
UTF-8 sequences produced by something other than the JSON serializer
itself (and I think we don't), we should not prohibit it. The Emacs
spirit is to let bad Lisp program enough rope to hang themselves if
that allows legitimate programs do their job more easily and flexibly.
> After all, it cannot have come from a received JSON message.
It could have, if it was encoded by the calling Lisp program. It
could also have been received from another source, in unibyte form
that is nonetheless valid UTF-8. If we force non-ASCII strings to be
multibyte, Lisp programs will be unable to take a unibyte UTF-8 string
received from an external source and plug it directly into an object
to be serialized into JSON; instead, they will have to decode the
string, then let the serializer encode it back -- a clear waste of CPU
cycles.
> I think it was just an another artefact of the old implementation. That code incorrectly used encode_string_utf_8 even on non-ASCII unibyte strings and trusted Jansson to validate the result. That resulted in a lot of wasted work and some strange strings getting accepted.
I'm not talking about the old implementation. I was not completely
happy with it, either, and in particular with its insistence of
signaling errors due to encoding issues. I think this is not our
business in this case: the responsibility for submitting a valid UTF-8
sequence, when we get a unibyte string, is on the caller.
> While it's theoretically possible that there are users with code relying on this behaviour, I can't find any evidence for it in the packages that I've looked at.
Once again, my bother is not about some code that expects us to encode
UTF-8 byte sequences -- doing that is definitely not TRT. What I
would like to see is that unibyte strings are passed through
unchanged, so that valid UTF-8 strings will be okay, and invalid ones
will produce invalid JSON. This is better than signaling errors,
IMNSHO, and in particular is more in-line with how Emacs handles
unibyte strings elsewhere.
> > I didn't suggest to decode the input string, not at all. I suggested
> > to allow unibyte strings, and process them just like you process
> > pure-ASCII strings, leaving it to the caller to make sure the string
> > has only valid UTF-8 sequences.
>
> Users of this raw-bytes-input feature (if they exist at all) previously had their input validated by Jansson. While mistakes would probably be detected at the other end I'm not sure it's a good idea.
Why not? Once again, if we get a unibyte string, the onus is on the
caller to verify it's valid UTF-8, or suffer the consequences.
> > Forcing callers to decode such
> > strings is IMO too harsh and largely unjustified.
>
> We usually force them to do so in most other contexts. To take a random example, `princ` doesn't work with encoded strings. But it's rarely a problem.
There are many examples to the contrary. For example, primitives that
deal with file names can accept both multibyte and unibyte encoded
strings.
> Let's see how testing goes. We'll find a solution no matter what, pass-through or separate slow-path validation, if it turns out that we really need to after all.
OK. FTR, I'm not in favor of validation of unibyte strings, I just
suggest that we treat them as plain-ASCII: pass them through without
any validation, leaving the validation to the callers.
^ permalink raw reply [flat|nested] 37+ messages in thread
* bug#70007: [PATCH] native JSON encoder
2024-03-29 6:04 ` Eli Zaretskii
@ 2024-03-30 11:41 ` Mattias Engdegård
2024-03-30 13:22 ` Eli Zaretskii
0 siblings, 1 reply; 37+ messages in thread
From: Mattias Engdegård @ 2024-03-30 11:41 UTC (permalink / raw)
To: Eli Zaretskii; +Cc: casouri, 70007
[-- Attachment #1: Type: text/plain, Size: 1068 bytes --]
29 mars 2024 kl. 07.04 skrev Eli Zaretskii <eliz@gnu.org>:
> OK. FTR, I'm not in favor of validation of unibyte strings, I just
> suggest that we treat them as plain-ASCII: pass them through without
> any validation, leaving the validation to the callers.
Actually we are more or less forced to validate unibyte strings as long as the serialiser returns multibyte. Which we agree that it probably shouldn't, but I'd first like to take some time to ensure that returning unibyte won't break anything.
Thank you for pushing the new JSON parser to master. I've rebased my patch and cleaned it up a bit, and it now removes all uses of Jansson from json.c. Since the change involves removing some Windows-specific code, perhaps you would like to check that it still compiles on that platform, if you have the time?
Otherwise I'll push it to master, and will remain ready to make any further adjustments necessary. We can then remove all remaining Jansson references (configuration, installation notes, etc), and make the required NEWS and manual changes.
[-- Attachment #2: 0001-New-JSON-encoder-bug-70007.patch --]
[-- Type: application/octet-stream, Size: 44367 bytes --]
From 4b2f5d8b55946c55838793046944b5680a19ba01 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mattias=20Engdeg=C3=A5rd?= <mattiase@acm.org>
Date: Tue, 26 Mar 2024 16:44:09 +0100
Subject: [PATCH] New JSON encoder (bug#70007)
It is in general at least 2x faster than the old encoder and does not
depend on any external library. Using our own code also gives us
control over translation details: for example, we now have full
bignum support and tighter float formatting.
* src/json.c (json_delete, json_initialized, init_json_functions)
(json_malloc, json_free, init_json, json_out_of_memory)
(json_releae_object, check_string_without_embedded_nulls, json_check)
(json_check_utf8, lisp_to_json_nonscalar_1, lisp_to_json_nonscalar)
(lisp_to_json, json_available_p, ensure_json_available, json_insert)
(json_handle_nonlocal_exit, json_insert_callback):
Remove. Remaining uses updated.
* src/json.c (json_out_t, symset_t, struct symset_tbl)
(symset_size, make_symset_table, push_symset, pop_symset)
(cleanup_symset_tables, symset_hash, symset_expand, symset_add)
(json_out_grow_buf, cleanup_json_out, json_make_room, JSON_OUT_STR)
(json_out_str, json_out_byte, json_out_fixnum, string_not_unicode)
(json_plain_char, json_out_string, json_out_nest, json_out_unnest)
(json_out_object_cons, json_out_object_hash), json_out_array)
(json_out_float, json_out_bignum, json_out_something)
(json_out_to_string, json_serialize): New.
(Fjson_serialize, Fjson_insert):
New JSON encoder implementation.
* test/src/json-tests.el (json-serialize/object-with-duplicate-keys)
(json-serialize/string): Update tests.
---
src/emacs.c | 4 -
src/json.c | 1071 ++++++++++++++++++++--------------------
src/lisp.h | 1 -
src/print.c | 1 +
test/src/json-tests.el | 41 +-
5 files changed, 581 insertions(+), 537 deletions(-)
diff --git a/src/emacs.c b/src/emacs.c
index 87f12d3fa86..4a34bb06425 100644
--- a/src/emacs.c
+++ b/src/emacs.c
@@ -2013,10 +2013,6 @@ main (int argc, char **argv)
init_random ();
init_xfaces ();
-#if defined HAVE_JSON && !defined WINDOWSNT
- init_json ();
-#endif
-
if (!initialized)
syms_of_comp ();
diff --git a/src/json.c b/src/json.c
index afc48c59d5a..711744138b8 100644
--- a/src/json.c
+++ b/src/json.c
@@ -25,189 +25,10 @@ Copyright (C) 2017-2024 Free Software Foundation, Inc.
#include <stdlib.h>
#include <math.h>
-#include <jansson.h>
-
#include "lisp.h"
#include "buffer.h"
#include "coding.h"
-#ifdef WINDOWSNT
-# include <windows.h>
-# include "w32common.h"
-# include "w32.h"
-
-DEF_DLL_FN (void, json_set_alloc_funcs,
- (json_malloc_t malloc_fn, json_free_t free_fn));
-DEF_DLL_FN (void, json_delete, (json_t *json));
-DEF_DLL_FN (json_t *, json_array, (void));
-DEF_DLL_FN (int, json_array_append_new, (json_t *array, json_t *value));
-DEF_DLL_FN (size_t, json_array_size, (const json_t *array));
-DEF_DLL_FN (json_t *, json_object, (void));
-DEF_DLL_FN (int, json_object_set_new,
- (json_t *object, const char *key, json_t *value));
-DEF_DLL_FN (json_t *, json_null, (void));
-DEF_DLL_FN (json_t *, json_true, (void));
-DEF_DLL_FN (json_t *, json_false, (void));
-DEF_DLL_FN (json_t *, json_integer, (json_int_t value));
-DEF_DLL_FN (json_t *, json_real, (double value));
-DEF_DLL_FN (json_t *, json_stringn, (const char *value, size_t len));
-DEF_DLL_FN (char *, json_dumps, (const json_t *json, size_t flags));
-DEF_DLL_FN (int, json_dump_callback,
- (const json_t *json, json_dump_callback_t callback, void *data,
- size_t flags));
-DEF_DLL_FN (json_t *, json_object_get, (const json_t *object, const char *key));
-
-/* This is called by json_decref, which is an inline function. */
-void json_delete(json_t *json)
-{
- fn_json_delete (json);
-}
-
-static bool json_initialized;
-
-static bool
-init_json_functions (void)
-{
- HMODULE library = w32_delayed_load (Qjson);
-
- if (!library)
- return false;
-
- LOAD_DLL_FN (library, json_set_alloc_funcs);
- LOAD_DLL_FN (library, json_delete);
- LOAD_DLL_FN (library, json_array);
- LOAD_DLL_FN (library, json_array_append_new);
- LOAD_DLL_FN (library, json_array_size);
- LOAD_DLL_FN (library, json_object);
- LOAD_DLL_FN (library, json_object_set_new);
- LOAD_DLL_FN (library, json_null);
- LOAD_DLL_FN (library, json_true);
- LOAD_DLL_FN (library, json_false);
- LOAD_DLL_FN (library, json_integer);
- LOAD_DLL_FN (library, json_real);
- LOAD_DLL_FN (library, json_stringn);
- LOAD_DLL_FN (library, json_dumps);
- LOAD_DLL_FN (library, json_dump_callback);
- LOAD_DLL_FN (library, json_object_get);
-
- init_json ();
-
- return true;
-}
-
-#define json_set_alloc_funcs fn_json_set_alloc_funcs
-#define json_array fn_json_array
-#define json_array_append_new fn_json_array_append_new
-#define json_array_size fn_json_array_size
-#define json_object fn_json_object
-#define json_object_set_new fn_json_object_set_new
-#define json_null fn_json_null
-#define json_true fn_json_true
-#define json_false fn_json_false
-#define json_integer fn_json_integer
-#define json_real fn_json_real
-#define json_stringn fn_json_stringn
-#define json_dumps fn_json_dumps
-#define json_dump_callback fn_json_dump_callback
-#define json_object_get fn_json_object_get
-
-#endif /* WINDOWSNT */
-
-/* We install a custom allocator so that we can avoid objects larger
- than PTRDIFF_MAX. Such objects wouldn't play well with the rest of
- Emacs's codebase, which generally uses ptrdiff_t for sizes and
- indices. The other functions in this file also generally assume
- that size_t values never exceed PTRDIFF_MAX.
-
- In addition, we need to use a custom allocator because on
- MS-Windows we replace malloc/free with our own functions, see
- w32heap.c, so we must force the library to use our allocator, or
- else we won't be able to free storage allocated by the library. */
-
-static void *
-json_malloc (size_t size)
-{
- if (size > PTRDIFF_MAX)
- {
- errno = ENOMEM;
- return NULL;
- }
- return malloc (size);
-}
-
-static void
-json_free (void *ptr)
-{
- free (ptr);
-}
-
-void
-init_json (void)
-{
- json_set_alloc_funcs (json_malloc, json_free);
-}
-
-/* Note that all callers of make_string_from_utf8 and build_string_from_utf8
- below either pass only value UTF-8 strings or use the functionf for
- formatting error messages; in the latter case correctness isn't
- critical. */
-
-/* Return a unibyte string containing the sequence of UTF-8 encoding
- units of the UTF-8 representation of STRING. If STRING does not
- represent a sequence of Unicode scalar values, return a string with
- unspecified contents. */
-
-static Lisp_Object
-json_encode (Lisp_Object string)
-{
- /* FIXME: Raise an error if STRING is not a scalar value
- sequence. */
- return encode_string_utf_8 (string, Qnil, false, Qt, Qt);
-}
-
-static AVOID
-json_out_of_memory (void)
-{
- xsignal0 (Qjson_out_of_memory);
-}
-
-static void
-json_release_object (void *object)
-{
- json_decref (object);
-}
-
-/* Signal an error if OBJECT is not a string, or if OBJECT contains
- embedded null characters. */
-
-static void
-check_string_without_embedded_nulls (Lisp_Object object)
-{
- CHECK_STRING (object);
- CHECK_TYPE (memchr (SDATA (object), '\0', SBYTES (object)) == NULL,
- Qstring_without_embedded_nulls_p, object);
-}
-
-/* Signal an error of type `json-out-of-memory' if OBJECT is
- NULL. */
-
-static json_t *
-json_check (json_t *object)
-{
- if (object == NULL)
- json_out_of_memory ();
- return object;
-}
-
-/* If STRING is not a valid UTF-8 string, signal an error of type
- `wrong-type-argument'. STRING must be a unibyte string. */
-
-static void
-json_check_utf8 (Lisp_Object string)
-{
- CHECK_TYPE (utf8_string_p (string), Qutf_8_string_p, string);
-}
-
enum json_object_type {
json_object_hashtable,
json_object_alist,
@@ -226,179 +47,6 @@ json_check_utf8 (Lisp_Object string)
Lisp_Object false_object;
};
-static json_t *lisp_to_json (Lisp_Object,
- const struct json_configuration *conf);
-
-/* Convert a Lisp object to a nonscalar JSON object (array or object). */
-
-static json_t *
-lisp_to_json_nonscalar_1 (Lisp_Object lisp,
- const struct json_configuration *conf)
-{
- json_t *json;
- specpdl_ref count;
-
- if (VECTORP (lisp))
- {
- ptrdiff_t size = ASIZE (lisp);
- json = json_check (json_array ());
- count = SPECPDL_INDEX ();
- record_unwind_protect_ptr (json_release_object, json);
- for (ptrdiff_t i = 0; i < size; ++i)
- {
- int status
- = json_array_append_new (json, lisp_to_json (AREF (lisp, i),
- conf));
- if (status == -1)
- json_out_of_memory ();
- }
- eassert (json_array_size (json) == size);
- }
- else if (HASH_TABLE_P (lisp))
- {
- struct Lisp_Hash_Table *h = XHASH_TABLE (lisp);
- json = json_check (json_object ());
- count = SPECPDL_INDEX ();
- record_unwind_protect_ptr (json_release_object, json);
- DOHASH (h, key, v)
- {
- CHECK_STRING (key);
- Lisp_Object ekey = json_encode (key);
- /* We can't specify the length, so the string must be
- null-terminated. */
- check_string_without_embedded_nulls (ekey);
- const char *key_str = SSDATA (ekey);
- /* Reject duplicate keys. These are possible if the hash
- table test is not `equal'. */
- if (json_object_get (json, key_str) != NULL)
- wrong_type_argument (Qjson_value_p, lisp);
- int status
- = json_object_set_new (json, key_str,
- lisp_to_json (v, conf));
- if (status == -1)
- {
- /* A failure can be caused either by an invalid key or
- by low memory. */
- json_check_utf8 (ekey);
- json_out_of_memory ();
- }
- }
- }
- else if (NILP (lisp))
- return json_check (json_object ());
- else if (CONSP (lisp))
- {
- Lisp_Object tail = lisp;
- json = json_check (json_object ());
- count = SPECPDL_INDEX ();
- record_unwind_protect_ptr (json_release_object, json);
- bool is_plist = !CONSP (XCAR (tail));
- FOR_EACH_TAIL (tail)
- {
- const char *key_str;
- Lisp_Object value;
- Lisp_Object key_symbol;
- if (is_plist)
- {
- key_symbol = XCAR (tail);
- tail = XCDR (tail);
- CHECK_CONS (tail);
- value = XCAR (tail);
- }
- else
- {
- Lisp_Object pair = XCAR (tail);
- CHECK_CONS (pair);
- key_symbol = XCAR (pair);
- value = XCDR (pair);
- }
- CHECK_SYMBOL (key_symbol);
- Lisp_Object key = SYMBOL_NAME (key_symbol);
- /* We can't specify the length, so the string must be
- null-terminated. */
- check_string_without_embedded_nulls (key);
- key_str = SSDATA (key);
- /* In plists, ensure leading ":" in keys is stripped. It
- will be reconstructed later in `json_to_lisp'.*/
- if (is_plist && ':' == key_str[0] && key_str[1])
- {
- key_str = &key_str[1];
- }
- /* Only add element if key is not already present. */
- if (json_object_get (json, key_str) == NULL)
- {
- int status
- = json_object_set_new (json, key_str, lisp_to_json (value,
- conf));
- if (status == -1)
- json_out_of_memory ();
- }
- }
- CHECK_LIST_END (tail, lisp);
- }
- else
- wrong_type_argument (Qjson_value_p, lisp);
-
- clear_unwind_protect (count);
- unbind_to (count, Qnil);
- return json;
-}
-
-/* Convert LISP to a nonscalar JSON object (array or object). Signal
- an error of type `wrong-type-argument' if LISP is not a vector,
- hashtable, alist, or plist. */
-
-static json_t *
-lisp_to_json_nonscalar (Lisp_Object lisp,
- const struct json_configuration *conf)
-{
- if (++lisp_eval_depth > max_lisp_eval_depth)
- xsignal0 (Qjson_object_too_deep);
- json_t *json = lisp_to_json_nonscalar_1 (lisp, conf);
- --lisp_eval_depth;
- return json;
-}
-
-/* Convert LISP to any JSON object. Signal an error of type
- `wrong-type-argument' if the type of LISP can't be converted to a
- JSON object. */
-
-static json_t *
-lisp_to_json (Lisp_Object lisp, const struct json_configuration *conf)
-{
- if (EQ (lisp, conf->null_object))
- return json_check (json_null ());
- else if (EQ (lisp, conf->false_object))
- return json_check (json_false ());
- else if (EQ (lisp, Qt))
- return json_check (json_true ());
- else if (INTEGERP (lisp))
- {
- intmax_t low = TYPE_MINIMUM (json_int_t);
- intmax_t high = TYPE_MAXIMUM (json_int_t);
- intmax_t value = check_integer_range (lisp, low, high);
- return json_check (json_integer (value));
- }
- else if (FLOATP (lisp))
- return json_check (json_real (XFLOAT_DATA (lisp)));
- else if (STRINGP (lisp))
- {
- Lisp_Object encoded = json_encode (lisp);
- json_t *json = json_stringn (SSDATA (encoded), SBYTES (encoded));
- if (json == NULL)
- {
- /* A failure can be caused either by an invalid string or by
- low memory. */
- json_check_utf8 (encoded);
- json_out_of_memory ();
- }
- return json;
- }
-
- /* LISP now must be a vector, hashtable, alist, or plist. */
- return lisp_to_json_nonscalar (lisp, conf);
-}
-
static void
json_parse_args (ptrdiff_t nargs,
Lisp_Object *args,
@@ -450,158 +98,533 @@ json_parse_args (ptrdiff_t nargs,
}
}
-static bool
-json_available_p (void)
+/* FIXME: Remove completely. */
+DEFUN ("json--available-p", Fjson__available_p, Sjson__available_p, 0, 0, NULL,
+ doc: /* Return non-nil if libjansson is available (internal use only). */)
+ (void)
{
-#ifdef WINDOWSNT
- if (!json_initialized)
- {
- Lisp_Object status;
- json_initialized = init_json_functions ();
- status = json_initialized ? Qt : Qnil;
- Vlibrary_cache = Fcons (Fcons (Qjson, status), Vlibrary_cache);
- }
- return json_initialized;
-#else /* !WINDOWSNT */
- return true;
-#endif
+ return Qt;
}
-#ifdef WINDOWSNT
+/* JSON encoding context. */
+typedef struct {
+ char *buf;
+ ptrdiff_t size; /* number of bytes in buf */
+ ptrdiff_t capacity; /* allocated size of buf */
+ ptrdiff_t chars_delta; /* size - {number of characters in buf} */
+
+ int maxdepth;
+ struct symset_tbl *ss_table; /* table used by containing object */
+ struct json_configuration conf;
+} json_out_t;
+
+/* Set of symbols. */
+typedef struct {
+ ptrdiff_t count; /* symbols in table */
+ int bits; /* log2(table size) */
+ struct symset_tbl *table; /* heap-allocated table */
+} symset_t;
+
+struct symset_tbl
+{
+ /* Table used by the containing object if any, so that we can free all
+ tables if an error occurs. */
+ struct symset_tbl *up;
+ /* Table of symbols (2**bits elements), Qunbound where unused. */
+ Lisp_Object entries[];
+};
+
+static inline ptrdiff_t
+symset_size (int bits)
+{
+ return (ptrdiff_t)1 << bits;
+}
+
+static struct symset_tbl *
+make_symset_table (int bits, struct symset_tbl *up)
+{
+ int maxbits = min (SIZE_WIDTH - 2 - (word_size < 8 ? 2 : 3), 32);
+ if (bits > maxbits)
+ error ("out of memory"); /* Will never happen in practice. */
+ struct symset_tbl *st = xnmalloc (sizeof *st->entries << bits, sizeof *st);
+ st->up = up;
+ ptrdiff_t size = symset_size (bits);
+ for (ptrdiff_t i = 0; i < size; i++)
+ st->entries[i] = Qunbound;
+ return st;
+}
+
+/* Create a new symset to use for a new object. */
+static symset_t
+push_symset (json_out_t *jo)
+{
+ int bits = 4;
+ struct symset_tbl *tbl = make_symset_table (bits, jo->ss_table);
+ jo->ss_table = tbl;
+ return (symset_t){ .count = 0, .bits = bits, .table = tbl };
+}
+
+/* Destroy the current symset. */
static void
-ensure_json_available (void)
+pop_symset (json_out_t *jo, symset_t *ss)
{
- if (!json_available_p ())
- Fsignal (Qjson_unavailable,
- list1 (build_unibyte_string ("jansson library not found")));
+ jo->ss_table = ss->table->up;
+ xfree (ss->table);
}
-#endif
-DEFUN ("json--available-p", Fjson__available_p, Sjson__available_p, 0, 0, NULL,
- doc: /* Return non-nil if libjansson is available (internal use only). */)
- (void)
+/* Remove all heap-allocated symset tables, in case an error occurred. */
+static void
+cleanup_symset_tables (struct symset_tbl *st)
{
- return json_available_p () ? Qt : Qnil;
+ while (st)
+ {
+ struct symset_tbl *up = st->up;
+ xfree (st);
+ st = up;
+ }
}
-DEFUN ("json-serialize", Fjson_serialize, Sjson_serialize, 1, MANY,
- NULL,
- doc: /* Return the JSON representation of OBJECT as a string.
+static inline uint32_t
+symset_hash (Lisp_Object sym, int bits)
+{
+ return knuth_hash (reduce_emacs_uint_to_hash_hash (XHASH (sym)), bits);
+}
-OBJECT must be t, a number, string, vector, hashtable, alist, plist,
-or the Lisp equivalents to the JSON null and false values, and its
-elements must recursively consist of the same kinds of values. t will
-be converted to the JSON true value. Vectors will be converted to
-JSON arrays, whereas hashtables, alists and plists are converted to
-JSON objects. Hashtable keys must be strings without embedded null
-characters and must be unique within each object. Alist and plist
-keys must be symbols; if a key is duplicate, the first instance is
-used.
+/* Enlarge the table used by a symset. */
+static NO_INLINE void
+symset_expand (symset_t *ss)
+{
+ struct symset_tbl *old_table = ss->table;
+ int oldbits = ss->bits;
+ ptrdiff_t oldsize = symset_size (oldbits);
+ int bits = oldbits + 1;
+ ss->bits = bits;
+ ss->table = make_symset_table (bits, old_table->up);
+ /* Move all entries from the old table to the new one. */
+ ptrdiff_t mask = symset_size (bits) - 1;
+ struct symset_tbl *tbl = ss->table;
+ for (ptrdiff_t i = 0; i < oldsize; i++)
+ {
+ Lisp_Object sym = old_table->entries[i];
+ if (!BASE_EQ (sym, Qunbound))
+ {
+ ptrdiff_t j = symset_hash (sym, bits);
+ while (!BASE_EQ (tbl->entries[j], Qunbound))
+ j = (j + 1) & mask;
+ tbl->entries[j] = sym;
+ }
+ }
+ xfree (old_table);
+}
-The Lisp equivalents to the JSON null and false values are
-configurable in the arguments ARGS, a list of keyword/argument pairs:
+/* If sym is in ss, return false; otherwise add it and return true.
+ Comparison is done by strict identity. */
+static inline bool
+symset_add (json_out_t *jo, symset_t *ss, Lisp_Object sym)
+{
+ /* Make sure we don't fill more than half of the table. */
+ if (ss->count >= (symset_size (ss->bits) >> 1))
+ {
+ symset_expand (ss);
+ jo->ss_table = ss->table;
+ }
-The keyword argument `:null-object' specifies which object to use
-to represent a JSON null value. It defaults to `:null'.
+ struct symset_tbl *tbl = ss->table;
+ ptrdiff_t mask = symset_size (ss->bits) - 1;
+ for (ptrdiff_t i = symset_hash (sym, ss->bits); ; i = (i + 1) & mask)
+ {
+ Lisp_Object s = tbl->entries[i];
+ if (BASE_EQ (s, sym))
+ return false; /* Previous occurrence found. */
+ if (BASE_EQ (s, Qunbound))
+ {
+ /* Not in set, add it. */
+ tbl->entries[i] = sym;
+ ss->count++;
+ return true;
+ }
+ }
+}
-The keyword argument `:false-object' specifies which object to use to
-represent a JSON false value. It defaults to `:false'.
+static NO_INLINE void
+json_out_grow_buf (json_out_t *jo, ptrdiff_t bytes)
+{
+ ptrdiff_t need = jo->size + bytes;
+ ptrdiff_t new_size = max (jo->capacity, 512);
+ while (new_size < need)
+ new_size <<= 1;
+ jo->buf = xrealloc (jo->buf, new_size);
+ jo->capacity = new_size;
+}
-In you specify the same value for `:null-object' and `:false-object',
-a potentially ambiguous situation, the JSON output will not contain
-any JSON false values.
-usage: (json-serialize OBJECT &rest ARGS) */)
- (ptrdiff_t nargs, Lisp_Object *args)
+static void
+cleanup_json_out (void *arg)
{
- specpdl_ref count = SPECPDL_INDEX ();
+ json_out_t *jo = arg;
+ xfree (jo->buf);
+ jo->buf = NULL;
+ cleanup_symset_tables (jo->ss_table);
+}
-#ifdef WINDOWSNT
- ensure_json_available ();
-#endif
+/* Make room for `bytes` more bytes in buffer. */
+static void
+json_make_room (json_out_t *jo, ptrdiff_t bytes)
+{
+ if (bytes > jo->capacity - jo->size)
+ json_out_grow_buf (jo, bytes);
+}
- struct json_configuration conf =
- {json_object_hashtable, json_array_array, QCnull, QCfalse};
- json_parse_args (nargs - 1, args + 1, &conf, false);
+#define JSON_OUT_STR(jo, str) (json_out_str (jo, str, sizeof (str) - 1))
- json_t *json = lisp_to_json (args[0], &conf);
- record_unwind_protect_ptr (json_release_object, json);
+/* Add `bytes` bytes from `str` to the buffer. */
+static void
+json_out_str (json_out_t *jo, const char *str, size_t bytes)
+{
+ json_make_room (jo, bytes);
+ memcpy (jo->buf + jo->size, str, bytes);
+ jo->size += bytes;
+}
- char *string = json_dumps (json, JSON_COMPACT | JSON_ENCODE_ANY);
- if (string == NULL)
- json_out_of_memory ();
- record_unwind_protect_ptr (json_free, string);
+static void
+json_out_byte (json_out_t *jo, unsigned char c)
+{
+ json_make_room (jo, 1);
+ jo->buf[jo->size++] = c;
+}
- return unbind_to (count, build_string_from_utf8 (string));
+static void
+json_out_fixnum (json_out_t *jo, EMACS_INT x)
+{
+ char buf[INT_BUFSIZE_BOUND (EMACS_INT)];
+ char *end = buf + sizeof buf;
+ char *p = fixnum_to_string (x, buf, end);
+ json_out_str (jo, p, end - p);
}
-struct json_buffer_and_size
+static AVOID
+string_not_unicode (Lisp_Object obj)
{
- const char *buffer;
- ptrdiff_t size;
- /* This tracks how many bytes were inserted by the callback since
- json_dump_callback was called. */
- ptrdiff_t inserted_bytes;
+ /* FIXME: this is just for compatibility with existing tests, it's not
+ a very descriptive error. */
+ wrong_type_argument (Qjson_value_p, obj);
+}
+
+static const unsigned char json_plain_char[256] = {
+ /* 32 chars/line: 1 for printable ASCII + DEL except " and \, 0 elsewhere */
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 00-1f */
+ 1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 20-3f */
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1, /* 40-5f */
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 60-7f */
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 80-9f */
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* a0-bf */
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* c0-df */
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* e0-ff */
};
-static Lisp_Object
-json_insert (void *data)
+static void
+json_out_string (json_out_t *jo, Lisp_Object str, int skip)
+{
+ /* FIXME: this code is slow, make faster! */
+
+ static const char hexchar[16] = "0123456789ABCDEF";
+ ptrdiff_t len = SBYTES (str);
+ json_make_room (jo, len + 2);
+ json_out_byte (jo, '"');
+ unsigned char *p = SDATA (str);
+ unsigned char *end = p + len;
+ p += skip;
+ while (p < end)
+ {
+ unsigned char c = *p;
+ if (json_plain_char[c])
+ {
+ json_out_byte (jo, c);
+ p++;
+ }
+ else if (c > 0x7f)
+ {
+ if (STRING_MULTIBYTE (str))
+ {
+ int n;
+ if (c <= 0xc1)
+ string_not_unicode (str);
+ if (c <= 0xdf)
+ n = 2;
+ else if (c <= 0xef)
+ {
+ int v = (((c & 0x0f) << 12)
+ + ((p[1] & 0x3f) << 6) + (p[2] & 0x3f));
+ if (char_surrogate_p (v))
+ string_not_unicode (str);
+ n = 3;
+ }
+ else if (c <= 0xf7)
+ {
+ int v = (((c & 0x07) << 18)
+ + ((p[1] & 0x3f) << 12)
+ + ((p[2] & 0x3f) << 6)
+ + (p[3] & 0x3f));
+ if (v > MAX_UNICODE_CHAR)
+ string_not_unicode (str);
+ n = 4;
+ }
+ else
+ string_not_unicode (str);
+ json_out_str (jo, (const char *)p, n);
+ jo->chars_delta += n - 1;
+ p += n;
+ }
+ else
+ string_not_unicode (str);
+ }
+ else
+ {
+ json_out_byte (jo, '\\');
+ switch (c)
+ {
+ case '"':
+ case '\\': json_out_byte (jo, c); break;
+ case '\b': json_out_byte (jo, 'b'); break;
+ case '\t': json_out_byte (jo, 't'); break;
+ case '\n': json_out_byte (jo, 'n'); break;
+ case '\f': json_out_byte (jo, 'f'); break;
+ case '\r': json_out_byte (jo, 'r'); break;
+ default:
+ {
+ char hex[5] = { 'u', '0', '0',
+ hexchar[c >> 4], hexchar[c & 0xf] };
+ json_out_str (jo, hex, 5);
+ break;
+ }
+ }
+ p++;
+ }
+ }
+ json_out_byte (jo, '"');
+}
+
+static void
+json_out_nest (json_out_t *jo)
+{
+ --jo->maxdepth;
+ if (jo->maxdepth < 0)
+ error ("Maximum JSON serialisation depth exceeded");
+}
+
+static void
+json_out_unnest (json_out_t *jo)
{
- struct json_buffer_and_size *buffer_and_size = data;
- ptrdiff_t len = buffer_and_size->size;
- ptrdiff_t inserted_bytes = buffer_and_size->inserted_bytes;
- ptrdiff_t gap_size = GAP_SIZE - inserted_bytes;
+ ++jo->maxdepth;
+}
- /* Enlarge the gap if necessary. */
- if (gap_size < len)
- make_gap (len - gap_size);
+static void json_out_something (json_out_t *jo, Lisp_Object obj);
- /* Copy this chunk of data into the gap. */
- memcpy ((char *) BEG_ADDR + PT_BYTE - BEG_BYTE + inserted_bytes,
- buffer_and_size->buffer, len);
- buffer_and_size->inserted_bytes += len;
- return Qnil;
+static void
+json_out_object_cons (json_out_t *jo, Lisp_Object obj)
+{
+ json_out_nest (jo);
+ symset_t ss = push_symset (jo);
+ json_out_byte (jo, '{');
+ bool is_alist = CONSP (XCAR (obj));
+ bool first = true;
+ Lisp_Object tail = obj;
+ FOR_EACH_TAIL (tail)
+ {
+ Lisp_Object key;
+ Lisp_Object value;
+ if (is_alist)
+ {
+ Lisp_Object pair = XCAR (tail);
+ CHECK_CONS (pair);
+ key = XCAR (pair);
+ value = XCDR (pair);
+ }
+ else
+ {
+ key = XCAR (tail);
+ tail = XCDR (tail);
+ CHECK_CONS (tail);
+ value = XCAR (tail);
+ }
+ key = maybe_remove_pos_from_symbol (key);
+ CHECK_TYPE (BARE_SYMBOL_P (key), Qsymbolp, key);
+
+ if (symset_add (jo, &ss, key))
+ {
+ if (!first)
+ json_out_byte (jo, ',');
+ first = false;
+
+ Lisp_Object key_str = SYMBOL_NAME (key);
+ const char *str = SSDATA (key_str);
+ /* Skip leading ':' in plist keys. */
+ int skip = !is_alist && str[0] == ':' && str[1] ? 1 : 0;
+ json_out_string (jo, key_str, skip);
+ json_out_byte (jo, ':');
+ json_out_something (jo, value);
+ }
+ }
+ CHECK_LIST_END (tail, obj);
+ json_out_byte (jo, '}');
+ pop_symset (jo, &ss);
+ json_out_unnest (jo);
}
-static Lisp_Object
-json_handle_nonlocal_exit (enum nonlocal_exit type, Lisp_Object data)
+static void
+json_out_object_hash (json_out_t *jo, Lisp_Object obj)
{
- switch (type)
+ json_out_nest (jo);
+ json_out_byte (jo, '{');
+ struct Lisp_Hash_Table *h = XHASH_TABLE (obj);
+ bool first = true;
+ DOHASH (h, k, v)
{
- case NONLOCAL_EXIT_SIGNAL:
- return data;
- case NONLOCAL_EXIT_THROW:
- return Fcons (Qno_catch, data);
- default:
- eassume (false);
+ if (!first)
+ json_out_byte (jo, ',');
+ first = false;
+ CHECK_STRING (k);
+ /* It's the user's responsibility to ensure that hash keys are
+ unique; we don't check for it. */
+ json_out_string (jo, k, 0);
+ json_out_byte (jo, ':');
+ json_out_something (jo, v);
}
+ json_out_byte (jo, '}');
+ json_out_unnest (jo);
+
}
-struct json_insert_data
+static void
+json_out_array (json_out_t *jo, Lisp_Object obj)
{
- /* This tracks how many bytes were inserted by the callback since
- json_dump_callback was called. */
- ptrdiff_t inserted_bytes;
- /* nil if json_insert succeeded, otherwise the symbol
- Qcatch_all_memory_full or a cons (ERROR-SYMBOL . ERROR-DATA). */
- Lisp_Object error;
-};
+ json_out_nest (jo);
+ json_out_byte (jo, '[');
+ ptrdiff_t n = ASIZE (obj);
+ for (ptrdiff_t i = 0; i < n; i++)
+ {
+ if (i > 0)
+ json_out_byte (jo, ',');
+ json_out_something (jo, AREF (obj, i));
+ }
+ json_out_byte (jo, ']');
+ json_out_unnest (jo);
+}
-/* Callback for json_dump_callback that inserts a JSON representation
- as a unibyte string into the gap. DATA must point to a structure
- of type json_insert_data. This function may not exit nonlocally.
- It catches all nonlocal exits and stores them in data->error for
- reraising. */
+static void
+json_out_float (json_out_t *jo, Lisp_Object f)
+{
+ double x = XFLOAT_DATA (f);
+ if (isinf (x) || isnan (x))
+ signal_error ("not a finite number", f);
+ /* As luck has it, float_to_string emits correct JSON float syntax for
+ all numbers (because Vfloat_output_format is Qnil). */
+ json_make_room (jo, FLOAT_TO_STRING_BUFSIZE);
+ int n = float_to_string (jo->buf + jo->size, x);
+ jo->size += n;
+}
-static int
-json_insert_callback (const char *buffer, size_t size, void *data)
+static void
+json_out_bignum (json_out_t *jo, Lisp_Object x)
{
- struct json_insert_data *d = data;
- struct json_buffer_and_size buffer_and_size
- = {.buffer = buffer, .size = size, .inserted_bytes = d->inserted_bytes};
- d->error = internal_catch_all (json_insert, &buffer_and_size,
- json_handle_nonlocal_exit);
- d->inserted_bytes = buffer_and_size.inserted_bytes;
- return NILP (d->error) ? 0 : -1;
+ int base = 10;
+ ptrdiff_t size = bignum_bufsize (x, base);
+ json_make_room (jo, size);
+ int n = bignum_to_c_string (jo->buf + jo->size, size, x, base);
+ jo->size += n;
+}
+
+static void
+json_out_something (json_out_t *jo, Lisp_Object obj)
+{
+ if (EQ (obj, jo->conf.null_object))
+ JSON_OUT_STR (jo, "null");
+ else if (EQ (obj, jo->conf.false_object))
+ JSON_OUT_STR (jo, "false");
+ else if (EQ (obj, Qt))
+ JSON_OUT_STR (jo, "true");
+ else if (NILP (obj))
+ JSON_OUT_STR (jo, "{}");
+ else if (FIXNUMP (obj))
+ json_out_fixnum (jo, XFIXNUM (obj));
+ else if (STRINGP (obj))
+ json_out_string (jo, obj, 0);
+ else if (CONSP (obj))
+ json_out_object_cons (jo, obj);
+ else if (FLOATP (obj))
+ json_out_float (jo, obj);
+ else if (HASH_TABLE_P (obj))
+ json_out_object_hash (jo, obj);
+ else if (VECTORP (obj))
+ json_out_array (jo, obj);
+ else if (BIGNUMP (obj))
+ json_out_bignum (jo, obj);
+ else
+ wrong_type_argument (Qjson_value_p, obj);
+}
+
+static Lisp_Object
+json_out_to_string (json_out_t *jo)
+{
+ /* FIXME: should this be a unibyte or multibyte string?
+ Right now we make a multibyte string for test compatibility,
+ but we are really encoding so unibyte would make more sense. */
+ ptrdiff_t nchars = jo->size - jo->chars_delta;
+ return make_multibyte_string (jo->buf, nchars, jo->size);
+}
+
+static void
+json_serialize (json_out_t *jo, Lisp_Object object,
+ ptrdiff_t nargs, Lisp_Object *args)
+{
+ *jo = (json_out_t) {
+ /* The maximum nesting depth allowed should be sufficient for most
+ uses but could be raised if necessary. (The default maximum
+ depth for JSON_checker is 20.) */
+ .maxdepth = 50,
+ .conf = {json_object_hashtable, json_array_array, QCnull, QCfalse}
+ };
+ json_parse_args (nargs, args, &jo->conf, false);
+ record_unwind_protect_ptr (cleanup_json_out, jo);
+
+ /* Make float conversion independent of float-output-format. */
+ if (!NILP (Vfloat_output_format))
+ specbind (Qfloat_output_format, Qnil);
+
+ json_out_something (jo, object);
+}
+
+DEFUN ("json-serialize", Fjson_serialize, Sjson_serialize, 1, MANY,
+ NULL,
+ doc: /* Return the JSON representation of OBJECT as a string.
+
+OBJECT must be t, a number, string, vector, hashtable, alist, plist,
+or the Lisp equivalents to the JSON null and false values, and its
+elements must recursively consist of the same kinds of values. t will
+be converted to the JSON true value. Vectors will be converted to
+JSON arrays, whereas hashtables, alists and plists are converted to
+JSON objects. Hashtable keys must be strings, unique within each object.
+Alist and plist keys must be symbols; if a key is duplicate, the first
+instance is used. A leading colon in plist keys is elided.
+
+The Lisp equivalents to the JSON null and false values are
+configurable in the arguments ARGS, a list of keyword/argument pairs:
+
+The keyword argument `:null-object' specifies which object to use
+to represent a JSON null value. It defaults to `:null'.
+
+The keyword argument `:false-object' specifies which object to use to
+represent a JSON false value. It defaults to `:false'.
+
+In you specify the same value for `:null-object' and `:false-object',
+a potentially ambiguous situation, the JSON output will not contain
+any JSON false values.
+usage: (json-serialize OBJECT &rest ARGS) */)
+ (ptrdiff_t nargs, Lisp_Object *args)
+{
+ specpdl_ref count = SPECPDL_INDEX ();
+ json_out_t jo;
+ json_serialize (&jo, args[0], nargs - 1, args + 1);
+ return unbind_to (count, json_out_to_string (&jo));
}
DEFUN ("json-insert", Fjson_insert, Sjson_insert, 1, MANY,
@@ -614,71 +637,52 @@ DEFUN ("json-insert", Fjson_insert, Sjson_insert, 1, MANY,
(ptrdiff_t nargs, Lisp_Object *args)
{
specpdl_ref count = SPECPDL_INDEX ();
+ json_out_t jo;
+ json_serialize (&jo, args[0], nargs - 1, args + 1);
-#ifdef WINDOWSNT
- ensure_json_available ();
-#endif
-
- struct json_configuration conf =
- {json_object_hashtable, json_array_array, QCnull, QCfalse};
- json_parse_args (nargs - 1, args + 1, &conf, false);
-
- json_t *json = lisp_to_json (args[0], &conf);
- record_unwind_protect_ptr (json_release_object, json);
+ /* FIXME: All the work below just to insert a string into a buffer? */
prepare_to_modify_buffer (PT, PT, NULL);
move_gap_both (PT, PT_BYTE);
- struct json_insert_data data;
- data.inserted_bytes = 0;
- /* Could have used json_dumpb, but that became available only in
- Jansson 2.10, whereas we want to support 2.7 and upward. */
- int status = json_dump_callback (json, json_insert_callback, &data,
- JSON_COMPACT | JSON_ENCODE_ANY);
- if (status == -1)
- {
- if (CONSP (data.error))
- xsignal (XCAR (data.error), XCDR (data.error));
- else
- json_out_of_memory ();
- }
+ if (GAP_SIZE < jo.size)
+ make_gap (jo.size - GAP_SIZE);
+ memcpy ((char *) BEG_ADDR + PT_BYTE - BEG_BYTE, jo.buf, jo.size);
+
+ /* No need to keep allocation beyond this point. */
+ unbind_to (count, Qnil);
ptrdiff_t inserted = 0;
- ptrdiff_t inserted_bytes = data.inserted_bytes;
- if (inserted_bytes > 0)
+ ptrdiff_t inserted_bytes = jo.size;
+
+ /* If required, decode the stuff we've read into the gap. */
+ struct coding_system coding;
+ /* JSON strings are UTF-8 encoded strings. */
+ setup_coding_system (Qutf_8_unix, &coding);
+ coding.dst_multibyte = !NILP (BVAR (current_buffer,
+ enable_multibyte_characters));
+ if (CODING_MAY_REQUIRE_DECODING (&coding))
{
- /* If required, decode the stuff we've read into the gap. */
- struct coding_system coding;
- /* JSON strings are UTF-8 encoded strings. If for some reason
- the text returned by the Jansson library includes invalid
- byte sequences, they will be represented by raw bytes in the
- buffer text. */
- setup_coding_system (Qutf_8_unix, &coding);
- coding.dst_multibyte =
- !NILP (BVAR (current_buffer, enable_multibyte_characters));
- if (CODING_MAY_REQUIRE_DECODING (&coding))
- {
- /* Now we have all the new bytes at the beginning of the gap,
- but `decode_coding_gap` needs them at the end of the gap, so
- we need to move them. */
- memmove (GAP_END_ADDR - inserted_bytes, GPT_ADDR, inserted_bytes);
- decode_coding_gap (&coding, inserted_bytes);
- inserted = coding.produced_char;
- }
- else
- {
- /* Make the inserted text part of the buffer, as unibyte text. */
- eassert (NILP (BVAR (current_buffer, enable_multibyte_characters)));
- insert_from_gap_1 (inserted_bytes, inserted_bytes, false);
-
- /* The target buffer is unibyte, so we don't need to decode. */
- invalidate_buffer_caches (current_buffer,
- PT, PT + inserted_bytes);
- adjust_after_insert (PT, PT_BYTE,
- PT + inserted_bytes,
- PT_BYTE + inserted_bytes,
- inserted_bytes);
- inserted = inserted_bytes;
- }
+ /* Now we have all the new bytes at the beginning of the gap,
+ but `decode_coding_gap` needs them at the end of the gap, so
+ we need to move them. */
+ memmove (GAP_END_ADDR - inserted_bytes, GPT_ADDR, inserted_bytes);
+ decode_coding_gap (&coding, inserted_bytes);
+ inserted = coding.produced_char;
+ }
+ else
+ {
+ /* Make the inserted text part of the buffer, as unibyte text. */
+ eassert (NILP (BVAR (current_buffer, enable_multibyte_characters)));
+ insert_from_gap_1 (inserted_bytes, inserted_bytes, false);
+
+ /* The target buffer is unibyte, so we don't need to decode. */
+ invalidate_buffer_caches (current_buffer,
+ PT, PT + inserted_bytes);
+ adjust_after_insert (PT, PT_BYTE,
+ PT + inserted_bytes,
+ PT_BYTE + inserted_bytes,
+ inserted_bytes);
+ inserted = inserted_bytes;
}
/* Call after-change hooks. */
@@ -690,7 +694,26 @@ DEFUN ("json-insert", Fjson_insert, Sjson_insert, 1, MANY,
SET_PT_BOTH (PT + inserted, PT_BYTE + inserted_bytes);
}
- return unbind_to (count, Qnil);
+ return Qnil;
+}
+
+
+/* Note that all callers of make_string_from_utf8 and build_string_from_utf8
+ below either pass only value UTF-8 strings or use the function for
+ formatting error messages; in the latter case correctness isn't
+ critical. */
+
+/* Return a unibyte string containing the sequence of UTF-8 encoding
+ units of the UTF-8 representation of STRING. If STRING does not
+ represent a sequence of Unicode scalar values, return a string with
+ unspecified contents. */
+
+static Lisp_Object
+json_encode (Lisp_Object string)
+{
+ /* FIXME: Raise an error if STRING is not a scalar value
+ sequence. */
+ return encode_string_utf_8 (string, Qnil, false, Qt, Qt);
}
#define JSON_PARSER_INTERNAL_OBJECT_WORKSPACE_SIZE 64
@@ -1894,7 +1917,6 @@ syms_of_json (void)
DEFSYM (QCnull, ":null");
DEFSYM (QCfalse, ":false");
- DEFSYM (Qstring_without_embedded_nulls_p, "string-without-embedded-nulls-p");
DEFSYM (Qjson_value_p, "json-value-p");
DEFSYM (Qjson_error, "json-error");
@@ -1907,7 +1929,6 @@ syms_of_json (void)
DEFSYM (Qjson_invalid_surrogate_error, "json-invalid-surrogate-error")
DEFSYM (Qjson_number_out_of_range, "json-number-out-of-range-error")
DEFSYM (Qjson_escape_sequence_error, "json-escape-sequence-error")
- DEFSYM (Qjson_unavailable, "json-unavailable");
define_error (Qjson_error, "generic JSON error", Qerror);
define_error (Qjson_out_of_memory,
"not enough memory for creating JSON object", Qjson_error);
diff --git a/src/lisp.h b/src/lisp.h
index f066c876619..7c4bd435cd8 100644
--- a/src/lisp.h
+++ b/src/lisp.h
@@ -4327,7 +4327,6 @@ verify (FLT_RADIX == 2 || FLT_RADIX == 16);
#ifdef HAVE_JSON
/* Defined in json.c. */
-extern void init_json (void);
extern void syms_of_json (void);
#endif
diff --git a/src/print.c b/src/print.c
index 76c577ec800..0d867b89395 100644
--- a/src/print.c
+++ b/src/print.c
@@ -2859,6 +2859,7 @@ syms_of_print (void)
A value of nil means to use the shortest notation
that represents the number without losing information. */);
Vfloat_output_format = Qnil;
+ DEFSYM (Qfloat_output_format, "float-output-format");
DEFVAR_BOOL ("print-integers-as-characters", print_integers_as_characters,
doc: /* Non-nil means integers are printed using characters syntax.
diff --git a/test/src/json-tests.el b/test/src/json-tests.el
index dffc6291ca1..e5cbe8bff5c 100644
--- a/test/src/json-tests.el
+++ b/test/src/json-tests.el
@@ -126,11 +126,38 @@ json-serialize/object
(ert-deftest json-serialize/object-with-duplicate-keys ()
(skip-unless (fboundp 'json-serialize))
- (let ((table (make-hash-table :test #'eq)))
- (puthash (copy-sequence "abc") [1 2 t] table)
- (puthash (copy-sequence "abc") :null table)
- (should (equal (hash-table-count table) 2))
- (should-error (json-serialize table) :type 'wrong-type-argument)))
+
+ (dolist (n '(1 5 20 100))
+ (let ((symbols (mapcar (lambda (i) (make-symbol (format "s%d" i)))
+ (number-sequence 1 n)))
+ (expected (concat "{"
+ (mapconcat (lambda (i) (format "\"s%d\":%d" i i))
+ (number-sequence 1 n) ",")
+ "}")))
+ ;; alist
+ (should (equal (json-serialize
+ (append
+ (cl-mapcar #'cons
+ symbols (number-sequence 1 n))
+ (cl-mapcar #'cons
+ symbols (number-sequence 1001 (+ 1000 n)))))
+ expected))
+ ;; plist
+ (should (equal (json-serialize
+ (append
+ (cl-mapcan #'list
+ symbols (number-sequence 1 n))
+ (cl-mapcan #'list
+ symbols (number-sequence 1001 (+ 1000 n)))))
+ expected))))
+
+ ;; We don't check for duplicated keys in hash tables.
+ ;; (let ((table (make-hash-table :test #'eq)))
+ ;; (puthash (copy-sequence "abc") [1 2 t] table)
+ ;; (puthash (copy-sequence "abc") :null table)
+ ;; (should (equal (hash-table-count table) 2))
+ ;; (should-error (json-serialize table) :type 'wrong-type-argument))
+ )
(ert-deftest json-parse-string/object ()
(skip-unless (fboundp 'json-parse-string))
@@ -173,8 +200,8 @@ json-serialize/string
(should (equal (json-serialize ["\nasdфыв\u001f\u007ffgh\t"])
"[\"\\nasdфыв\\u001F\u007ffgh\\t\"]"))
(should (equal (json-serialize ["a\0b"]) "[\"a\\u0000b\"]"))
- ;; FIXME: Is this the right behavior?
- (should (equal (json-serialize ["\u00C4\xC3\x84"]) "[\"\u00C4\u00C4\"]")))
+ (should-error (json-serialize ["\xC3\x84"]))
+ (should-error (json-serialize ["\u00C4\xC3\x84"])))
(ert-deftest json-serialize/invalid-unicode ()
(skip-unless (fboundp 'json-serialize))
--
2.32.0 (Apple Git-132)
^ permalink raw reply related [flat|nested] 37+ messages in thread
* bug#70007: [PATCH] native JSON encoder
2024-03-30 11:41 ` Mattias Engdegård
@ 2024-03-30 13:22 ` Eli Zaretskii
2024-03-30 14:22 ` Mattias Engdegård
0 siblings, 1 reply; 37+ messages in thread
From: Eli Zaretskii @ 2024-03-30 13:22 UTC (permalink / raw)
To: Mattias Engdegård; +Cc: casouri, 70007
> From: Mattias Engdegård <mattias.engdegard@gmail.com>
> Date: Sat, 30 Mar 2024 12:41:31 +0100
> Cc: casouri@gmail.com,
> 70007@debbugs.gnu.org
>
> 29 mars 2024 kl. 07.04 skrev Eli Zaretskii <eliz@gnu.org>:
>
> > OK. FTR, I'm not in favor of validation of unibyte strings, I just
> > suggest that we treat them as plain-ASCII: pass them through without
> > any validation, leaving the validation to the callers.
>
> Actually we are more or less forced to validate unibyte strings as long as the serialiser returns multibyte. Which we agree that it probably shouldn't, but I'd first like to take some time to ensure that returning unibyte won't break anything.
Yes, I was writing about the state of affairs when we change the
serializer to emit unibyte strings.
> Thank you for pushing the new JSON parser to master. I've rebased my patch and cleaned it up a bit, and it now removes all uses of Jansson from json.c. Since the change involves removing some Windows-specific code, perhaps you would like to check that it still compiles on that platform, if you have the time?
It builds and passes the tests, thanks. But I have a couple of minor
nits below.
> +static struct symset_tbl *
> +make_symset_table (int bits, struct symset_tbl *up)
> +{
> + int maxbits = min (SIZE_WIDTH - 2 - (word_size < 8 ? 2 : 3), 32);
> + if (bits > maxbits)
> + error ("out of memory"); /* Will never happen in practice. */
This should be a call to memory_full, no? Or if we must signal this
error here, at least make the error message more specific: mention
JSON or somesuch.
> +{
> + double x = XFLOAT_DATA (f);
> + if (isinf (x) || isnan (x))
> + signal_error ("not a finite number", f);
I'd prefer a more specific error message here, like
JSON does not allow Inf or NaN
Last, but not least: should we have a json-available-p function that
always returns non-nil, for better backward-compatibility? Otherwise,
code out there might decide to use json.elm which is not what we want.
^ permalink raw reply [flat|nested] 37+ messages in thread
* bug#70007: [PATCH] native JSON encoder
2024-03-30 13:22 ` Eli Zaretskii
@ 2024-03-30 14:22 ` Mattias Engdegård
2024-03-30 16:14 ` Richard Copley
` (2 more replies)
0 siblings, 3 replies; 37+ messages in thread
From: Mattias Engdegård @ 2024-03-30 14:22 UTC (permalink / raw)
To: Eli Zaretskii; +Cc: casouri, 70007
30 mars 2024 kl. 14.22 skrev Eli Zaretskii <eliz@gnu.org>:
> This should be a call to memory_full, no? Or if we must signal this
> error here, at least make the error message more specific: mention
> JSON or somesuch.
It's academic, but memory_full is fine.
> I'd prefer a more specific error message here, like
>
> JSON does not allow Inf or NaN
Sure.
> Last, but not least: should we have a json-available-p function that
> always returns non-nil, for better backward-compatibility? Otherwise,
> code out there might decide to use json.elm which is not what we want.
Yes, keep json-available-p (that always returns t) for compatibility. We could declare it obsolete but it's not very important.
I made the changes above and installed it on master, as well as a sweeping removal of all things Jansson.
^ permalink raw reply [flat|nested] 37+ messages in thread
* bug#70007: [PATCH] native JSON encoder
2024-03-30 14:22 ` Mattias Engdegård
@ 2024-03-30 16:14 ` Richard Copley
2024-03-30 16:40 ` Eli Zaretskii
2024-03-30 16:45 ` Andy Moreton
2024-03-30 16:37 ` Eli Zaretskii
2024-04-02 14:13 ` Mattias Engdegård
2 siblings, 2 replies; 37+ messages in thread
From: Richard Copley @ 2024-03-30 16:14 UTC (permalink / raw)
To: Mattias Engdegård; +Cc: 70007
On 30/03/2024 14:22, Mattias Engdegård wrote:
> I made the changes above and installed it on master, as well as a sweeping removal of all things Jansson.
Hi Mattias,
A clean build fails with a linker error:
CCLD temacs
/usr/bin/ld: /tmp/cc4stUid.ltrans15.ltrans.o: in function `main':
<artificial>:(.text.startup+0x39a9): undefined reference to `syms_of_json'
collect2: error: ld returned 1 exit status
make[2]: *** [Makefile:739: temacs] Error 1
^ permalink raw reply [flat|nested] 37+ messages in thread
* bug#70007: [PATCH] native JSON encoder
2024-03-30 14:22 ` Mattias Engdegård
2024-03-30 16:14 ` Richard Copley
@ 2024-03-30 16:37 ` Eli Zaretskii
2024-03-30 20:21 ` Mattias Engdegård
2024-04-02 14:13 ` Mattias Engdegård
2 siblings, 1 reply; 37+ messages in thread
From: Eli Zaretskii @ 2024-03-30 16:37 UTC (permalink / raw)
To: Mattias Engdegård; +Cc: casouri, 70007
> From: Mattias Engdegård <mattias.engdegard@gmail.com>
> Date: Sat, 30 Mar 2024 15:22:34 +0100
> Cc: casouri@gmail.com,
> 70007@debbugs.gnu.org
>
> I made the changes above and installed it on master, as well as a sweeping removal of all things Jansson.
Thanks. The new code failed to link due to an omission in
src/Makefile.in, and also the "clever" initialization in
json_serialize confused make-docfile (so globals.h were generated
incorrectly: they missed the DEFSYMs in syms_of_json). I think I
fixed that, but please eyeball the changes to see if I missed
something.
^ permalink raw reply [flat|nested] 37+ messages in thread
* bug#70007: [PATCH] native JSON encoder
2024-03-30 16:14 ` Richard Copley
@ 2024-03-30 16:40 ` Eli Zaretskii
2024-03-30 23:29 ` Richard Copley
2024-03-30 16:45 ` Andy Moreton
1 sibling, 1 reply; 37+ messages in thread
From: Eli Zaretskii @ 2024-03-30 16:40 UTC (permalink / raw)
To: Richard Copley; +Cc: mattias.engdegard, 70007
> Cc: 70007@debbugs.gnu.org
> Date: Sat, 30 Mar 2024 16:14:45 +0000
> From: Richard Copley <rcopley@gmail.com>
>
> On 30/03/2024 14:22, Mattias Engdegård wrote:
> > I made the changes above and installed it on master, as well as a sweeping removal of all things Jansson.
>
> Hi Mattias,
> A clean build fails with a linker error:
>
> CCLD temacs
> /usr/bin/ld: /tmp/cc4stUid.ltrans15.ltrans.o: in function `main':
> <artificial>:(.text.startup+0x39a9): undefined reference to `syms_of_json'
> collect2: error: ld returned 1 exit status
> make[2]: *** [Makefile:739: temacs] Error 1
Please try again, I hope I fixed this.
^ permalink raw reply [flat|nested] 37+ messages in thread
* bug#70007: [PATCH] native JSON encoder
2024-03-30 16:14 ` Richard Copley
2024-03-30 16:40 ` Eli Zaretskii
@ 2024-03-30 16:45 ` Andy Moreton
2024-03-30 20:36 ` Corwin Brust
1 sibling, 1 reply; 37+ messages in thread
From: Andy Moreton @ 2024-03-30 16:45 UTC (permalink / raw)
To: 70007
On Sat 30 Mar 2024, Richard Copley wrote:
> On 30/03/2024 14:22, Mattias Engdegård wrote:
>> I made the changes above and installed it on master, as well as a sweeping removal of all things Jansson.
>
> Hi Mattias,
> A clean build fails with a linker error:
>
> CCLD temacs
> /usr/bin/ld: /tmp/cc4stUid.ltrans15.ltrans.o: in function `main':
> <artificial>:(.text.startup+0x39a9): undefined reference to `syms_of_json'
> collect2: error: ld returned 1 exit status
> make[2]: *** [Makefile:739: temacs] Error 1
Commit 1135ce461d18 ("Always enable native JSON support and remove
Jansson references") removed json.o from configure.ac, but did not add it
in Makefile.in to ensure it is always built.
Adding json.o to base_obj there appears to fix the build.
AndyM
^ permalink raw reply [flat|nested] 37+ messages in thread
* bug#70007: [PATCH] native JSON encoder
2024-03-30 16:37 ` Eli Zaretskii
@ 2024-03-30 20:21 ` Mattias Engdegård
0 siblings, 0 replies; 37+ messages in thread
From: Mattias Engdegård @ 2024-03-30 20:21 UTC (permalink / raw)
To: Eli Zaretskii; +Cc: casouri, 70007
30 mars 2024 kl. 17.37 skrev Eli Zaretskii <eliz@gnu.org>:
> Thanks.
> The new code failed to link due to an omission in
> src/Makefile.in, and also the "clever" initialization in
> json_serialize confused make-docfile (so globals.h were generated
> incorrectly: they missed the DEFSYMs in syms_of_json). I think I
> fixed that, but please eyeball the changes to see if I missed
> something.
Blast, I thought I had bootstrapped and tested it but naturally used the wrong tree. Thanks for putting it right.
We should do something about make-docfile one of these days.
^ permalink raw reply [flat|nested] 37+ messages in thread
* bug#70007: [PATCH] native JSON encoder
2024-03-30 16:45 ` Andy Moreton
@ 2024-03-30 20:36 ` Corwin Brust
0 siblings, 0 replies; 37+ messages in thread
From: Corwin Brust @ 2024-03-30 20:36 UTC (permalink / raw)
To: Andy Moreton; +Cc: 70007
> >> I made the changes above and installed it on master, as well as a sweeping removal of all things Jansson.
This builds okay for me with MSYS2/Mingw64; here are some Windows
binaries if anyone would like to test:
https://corwin.bru.st/emacs-30/emacs-30-000f91
Thanks!
^ permalink raw reply [flat|nested] 37+ messages in thread
* bug#70007: [PATCH] native JSON encoder
2024-03-30 16:40 ` Eli Zaretskii
@ 2024-03-30 23:29 ` Richard Copley
0 siblings, 0 replies; 37+ messages in thread
From: Richard Copley @ 2024-03-30 23:29 UTC (permalink / raw)
To: Eli Zaretskii; +Cc: mattias.engdegard, 70007
On Sat, 30 Mar 2024 at 16:40, Eli Zaretskii <eliz@gnu.org> wrote:
> Please try again, I hope I fixed this.
Thanks, looks good.
^ permalink raw reply [flat|nested] 37+ messages in thread
* bug#70007: [PATCH] native JSON encoder
2024-03-30 14:22 ` Mattias Engdegård
2024-03-30 16:14 ` Richard Copley
2024-03-30 16:37 ` Eli Zaretskii
@ 2024-04-02 14:13 ` Mattias Engdegård
2024-04-02 16:13 ` Eli Zaretskii
2 siblings, 1 reply; 37+ messages in thread
From: Mattias Engdegård @ 2024-04-02 14:13 UTC (permalink / raw)
To: Eli Zaretskii; +Cc: casouri, 70007
[-- Attachment #1: Type: text/plain, Size: 708 bytes --]
Looks like the new serialiser inherited a bug from the old code: `json-insert` in a unibyte buffer does not move point correctly. Example:
(with-temp-buffer
(set-buffer-multibyte nil)
(json-insert "é")
(list (buffer-string) (point)))
=> ("\"\303\251\"" 4)
The string is correct but the position should be 5, not 4.
This made me look at the Fjson_insert logic a bit. I'm probably betraying my lack of knowledge about buffer subtleties here, but since the serialiser always produces (correct) UTF-8, shouldn't it be enough to copy the bytes, don't bother with any decoding, and perform the buffer insertion ceremonies?
Proposed patch attached. (There will also be a test, of course.)
[-- Attachment #2: json-insert.diff --]
[-- Type: application/octet-stream, Size: 2025 bytes --]
diff --git a/src/json.c b/src/json.c
index c3244ad04d2..7c62d63c6fd 100644
--- a/src/json.c
+++ b/src/json.c
@@ -656,39 +656,14 @@ DEFUN ("json-insert", Fjson_insert, Sjson_insert, 1, MANY,
/* No need to keep allocation beyond this point. */
unbind_to (count, Qnil);
- ptrdiff_t inserted = 0;
+ bool ub_buffer = NILP (BVAR (current_buffer, enable_multibyte_characters));
ptrdiff_t inserted_bytes = jo.size;
+ ptrdiff_t inserted = ub_buffer ? jo.size : jo.size - jo.chars_delta;
- /* If required, decode the stuff we've read into the gap. */
- struct coding_system coding;
- /* JSON strings are UTF-8 encoded strings. */
- setup_coding_system (Qutf_8_unix, &coding);
- coding.dst_multibyte = !NILP (BVAR (current_buffer,
- enable_multibyte_characters));
- if (CODING_MAY_REQUIRE_DECODING (&coding))
- {
- /* Now we have all the new bytes at the beginning of the gap,
- but `decode_coding_gap` needs them at the end of the gap, so
- we need to move them. */
- memmove (GAP_END_ADDR - inserted_bytes, GPT_ADDR, inserted_bytes);
- decode_coding_gap (&coding, inserted_bytes);
- inserted = coding.produced_char;
- }
- else
- {
- /* Make the inserted text part of the buffer, as unibyte text. */
- eassert (NILP (BVAR (current_buffer, enable_multibyte_characters)));
- insert_from_gap_1 (inserted_bytes, inserted_bytes, false);
-
- /* The target buffer is unibyte, so we don't need to decode. */
- invalidate_buffer_caches (current_buffer,
- PT, PT + inserted_bytes);
- adjust_after_insert (PT, PT_BYTE,
- PT + inserted_bytes,
- PT_BYTE + inserted_bytes,
- inserted_bytes);
- inserted = inserted_bytes;
- }
+ insert_from_gap_1 (inserted, inserted_bytes, false);
+ invalidate_buffer_caches (current_buffer, PT, PT + inserted);
+ adjust_after_insert (PT, PT_BYTE, PT + inserted, PT_BYTE + inserted_bytes,
+ inserted);
/* Call after-change hooks. */
signal_after_change (PT, 0, inserted);
^ permalink raw reply related [flat|nested] 37+ messages in thread
* bug#70007: [PATCH] native JSON encoder
2024-04-02 14:13 ` Mattias Engdegård
@ 2024-04-02 16:13 ` Eli Zaretskii
2024-04-02 17:19 ` Mattias Engdegård
0 siblings, 1 reply; 37+ messages in thread
From: Eli Zaretskii @ 2024-04-02 16:13 UTC (permalink / raw)
To: Mattias Engdegård; +Cc: casouri, 70007
> From: Mattias Engdegård <mattias.engdegard@gmail.com>
> Date: Tue, 2 Apr 2024 16:13:58 +0200
> Cc: casouri@gmail.com,
> 70007@debbugs.gnu.org
>
> Looks like the new serialiser inherited a bug from the old code: `json-insert` in a unibyte buffer does not move point correctly. Example:
>
> (with-temp-buffer
> (set-buffer-multibyte nil)
> (json-insert "é")
> (list (buffer-string) (point)))
> => ("\"\303\251\"" 4)
>
> The string is correct but the position should be 5, not 4.
In a build with --enable-checking, this hits an assertion. So I think
we should add this to the test suite.
> This made me look at the Fjson_insert logic a bit. I'm probably betraying my lack of knowledge about buffer subtleties here, but since the serialiser always produces (correct) UTF-8, shouldn't it be enough to copy the bytes, don't bother with any decoding, and perform the buffer insertion ceremonies?
Yes, I think that's true.
One nit, though: if the result could be an empty string, then we
should not do anything at all, not even invalidate_buffer_caches.
^ permalink raw reply [flat|nested] 37+ messages in thread
* bug#70007: [PATCH] native JSON encoder
2024-04-02 16:13 ` Eli Zaretskii
@ 2024-04-02 17:19 ` Mattias Engdegård
2024-08-24 15:33 ` Mattias Engdegård
0 siblings, 1 reply; 37+ messages in thread
From: Mattias Engdegård @ 2024-04-02 17:19 UTC (permalink / raw)
To: Eli Zaretskii; +Cc: casouri, 70007
2 apr. 2024 kl. 18.13 skrev Eli Zaretskii <eliz@gnu.org>:
>> This made me look at the Fjson_insert logic a bit. I'm probably betraying my lack of knowledge about buffer subtleties here, but since the serialiser always produces (correct) UTF-8, shouldn't it be enough to copy the bytes, don't bother with any decoding, and perform the buffer insertion ceremonies?
>
> Yes, I think that's true.
Thank you, now pushed with added tests.
> One nit, though: if the result could be an empty string, then we
> should not do anything at all, not even invalidate_buffer_caches.
I don't think json-insert can ever end up inserting the empty string (unless it signals, and then we won't get to the insertion stage). But I added an assertion for this.
^ permalink raw reply [flat|nested] 37+ messages in thread
* bug#70007: [PATCH] native JSON encoder
2024-04-02 17:19 ` Mattias Engdegård
@ 2024-08-24 15:33 ` Mattias Engdegård
2024-08-24 16:14 ` Eli Zaretskii
0 siblings, 1 reply; 37+ messages in thread
From: Mattias Engdegård @ 2024-08-24 15:33 UTC (permalink / raw)
To: Eli Zaretskii; +Cc: Yuan Fu, 70007
(I'm going through some old unfinished work.)
The only major issue remaining here would be whether `json-serialize` should return a multibyte string, as it does now, or a unibyte string. I think we agree that unibyte is the correct choice in general, and since the code is new in Emacs 30 we should make a decision now because changing it in Emacs 31 will be riskier. On the other hand, the old libjansson implementation returned a multibyte string.
I've looked at packages using the API and they appear entirely agnostic -- the result is almost always a string that is fed into `process-send-string`.
^ permalink raw reply [flat|nested] 37+ messages in thread
* bug#70007: [PATCH] native JSON encoder
2024-08-24 15:33 ` Mattias Engdegård
@ 2024-08-24 16:14 ` Eli Zaretskii
2024-08-24 19:45 ` Mattias Engdegård
0 siblings, 1 reply; 37+ messages in thread
From: Eli Zaretskii @ 2024-08-24 16:14 UTC (permalink / raw)
To: Mattias Engdegård; +Cc: casouri, 70007
> From: Mattias Engdegård <mattias.engdegard@gmail.com>
> Date: Sat, 24 Aug 2024 17:33:33 +0200
> Cc: Yuan Fu <casouri@gmail.com>,
> 70007@debbugs.gnu.org
>
> (I'm going through some old unfinished work.)
>
> The only major issue remaining here would be whether `json-serialize` should return a multibyte string, as it does now, or a unibyte string. I think we agree that unibyte is the correct choice in general, and since the code is new in Emacs 30 we should make a decision now because changing it in Emacs 31 will be riskier. On the other hand, the old libjansson implementation returned a multibyte string.
>
> I've looked at packages using the API and they appear entirely agnostic -- the result is almost always a string that is fed into `process-send-string`.
We are very late in the release process to make such changes without
reservations. We should have discussed this much earlier, but that's
water under the bridge.
Since this could potentially (albeit unlikely) break someone's code,
I'm okay with making this change, but with a twist: let's have a
variable which could be let-bound around the call to json-serialize,
to make the result a multibyte string instead of the default unibyte.
This is so if someone comes back crying that we broke his/her code,
they could have an easy fire escape.
OK?
P.S. Of course, the fact that the result is a unibyte string should be
mentioned in the doc string.
^ permalink raw reply [flat|nested] 37+ messages in thread
* bug#70007: [PATCH] native JSON encoder
2024-08-24 16:14 ` Eli Zaretskii
@ 2024-08-24 19:45 ` Mattias Engdegård
2024-08-25 5:07 ` Eli Zaretskii
0 siblings, 1 reply; 37+ messages in thread
From: Mattias Engdegård @ 2024-08-24 19:45 UTC (permalink / raw)
To: Eli Zaretskii; +Cc: casouri, 70007
24 aug. 2024 kl. 18.14 skrev Eli Zaretskii <eliz@gnu.org>:
> We are very late in the release process to make such changes without
> reservations. We should have discussed this much earlier, but that's
> water under the bridge.
Yes, sorry about not following this through in time. But here we are.
> Since this could potentially (albeit unlikely) break someone's code,
> I'm okay with making this change, but with a twist: let's have a
> variable which could be let-bound around the call to json-serialize,
> to make the result a multibyte string instead of the default unibyte.
> This is so if someone comes back crying that we broke his/her code,
> they could have an easy fire escape.
Sure, but it's a bit annoying to saddle us with some baggage that we never expect to be actually used.
And for that matter, it's no easier to bind a variable around the call than putting a call to `decode-coding-string` around it, is it? We could describe that in NEWS.
The null option is to do nothing and stick to what we have now. It's not a terrible choice either.
> P.S. Of course, the fact that the result is a unibyte string should be
> mentioned in the doc string.
Yes, definitely.
^ permalink raw reply [flat|nested] 37+ messages in thread
* bug#70007: [PATCH] native JSON encoder
2024-08-24 19:45 ` Mattias Engdegård
@ 2024-08-25 5:07 ` Eli Zaretskii
2024-08-25 17:55 ` Stefan Monnier via Bug reports for GNU Emacs, the Swiss army knife of text editors
0 siblings, 1 reply; 37+ messages in thread
From: Eli Zaretskii @ 2024-08-25 5:07 UTC (permalink / raw)
To: Mattias Engdegård, Stefan Monnier, Stefan Kangas,
Andrea Corallo
Cc: casouri, 70007
> From: Mattias Engdegård <mattias.engdegard@gmail.com>
> Date: Sat, 24 Aug 2024 21:45:57 +0200
> Cc: casouri@gmail.com,
> 70007@debbugs.gnu.org
>
> 24 aug. 2024 kl. 18.14 skrev Eli Zaretskii <eliz@gnu.org>:
>
> > We are very late in the release process to make such changes without
> > reservations. We should have discussed this much earlier, but that's
> > water under the bridge.
>
> Yes, sorry about not following this through in time. But here we are.
>
> > Since this could potentially (albeit unlikely) break someone's code,
> > I'm okay with making this change, but with a twist: let's have a
> > variable which could be let-bound around the call to json-serialize,
> > to make the result a multibyte string instead of the default unibyte.
> > This is so if someone comes back crying that we broke his/her code,
> > they could have an easy fire escape.
>
> Sure, but it's a bit annoying to saddle us with some baggage that we never expect to be actually used.
> And for that matter, it's no easier to bind a variable around the call than putting a call to `decode-coding-string` around it, is it? We could describe that in NEWS.
>
> The null option is to do nothing and stick to what we have now. It's not a terrible choice either.
Let's see what others think about this. Stefan, Stefan, and Andrea:
any opinions wrt what, if anything, we should do about this on the
emacs-30 branch?
^ permalink raw reply [flat|nested] 37+ messages in thread
* bug#70007: [PATCH] native JSON encoder
2024-08-25 5:07 ` Eli Zaretskii
@ 2024-08-25 17:55 ` Stefan Monnier via Bug reports for GNU Emacs, the Swiss army knife of text editors
2024-08-25 18:26 ` Eli Zaretskii
2024-08-25 20:08 ` Mattias Engdegård
0 siblings, 2 replies; 37+ messages in thread
From: Stefan Monnier via Bug reports for GNU Emacs, the Swiss army knife of text editors @ 2024-08-25 17:55 UTC (permalink / raw)
To: Eli Zaretskii
Cc: Mattias Engdegård, Andrea Corallo, casouri, Stefan Kangas,
70007
>> > Since this could potentially (albeit unlikely) break someone's code,
>> > I'm okay with making this change, but with a twist: let's have a
>> > variable which could be let-bound around the call to json-serialize,
>> > to make the result a multibyte string instead of the default unibyte.
>> > This is so if someone comes back crying that we broke his/her code,
>> > they could have an easy fire escape.
>> Sure, but it's a bit annoying to saddle us with some baggage that we never
>> expect to be actually used.
>> And for that matter, it's no easier to bind a variable around the call
>> than putting a call to `decode-coding-string` around it, is it? We could
>> describe that in NEWS.
The variable solution has one advantage: users can (setq ... t) in their
`.emacs` to keep old code working, tho potentially at the cost of
breaking new code.
Another option is to introduce a new name (that outputs a unicode
string) and obsolete the old one. 🙂
But just making a breaking change is not that bad: ELisp has evolved to
be pretty good at making code magically work for both encoded and
decoded strings.
>> The null option is to do nothing and stick to what we have now.
>> It's not a terrible choice either.
Returning an encoded strings is something we do want to avoid in the
long run, so we might as well bite the bullet now.
> Let's see what others think about this. Stefan, Stefan, and Andrea:
> any opinions wrt what, if anything, we should do about this on the
> emacs-30 branch?
I vote for breaking compatibility.
Stefan
^ permalink raw reply [flat|nested] 37+ messages in thread
* bug#70007: [PATCH] native JSON encoder
2024-08-25 17:55 ` Stefan Monnier via Bug reports for GNU Emacs, the Swiss army knife of text editors
@ 2024-08-25 18:26 ` Eli Zaretskii
2024-08-25 19:20 ` Stefan Monnier via Bug reports for GNU Emacs, the Swiss army knife of text editors
2024-08-25 20:08 ` Mattias Engdegård
1 sibling, 1 reply; 37+ messages in thread
From: Eli Zaretskii @ 2024-08-25 18:26 UTC (permalink / raw)
To: Stefan Monnier; +Cc: mattias.engdegard, acorallo, casouri, stefankangas, 70007
> From: Stefan Monnier <monnier@iro.umontreal.ca>
> Cc: Mattias Engdegård <mattias.engdegard@gmail.com>,
> Stefan Kangas
> <stefankangas@gmail.com>, Andrea Corallo <acorallo@gnu.org>,
> casouri@gmail.com, 70007@debbugs.gnu.org
> Date: Sun, 25 Aug 2024 13:55:53 -0400
>
> > Let's see what others think about this. Stefan, Stefan, and Andrea:
> > any opinions wrt what, if anything, we should do about this on the
> > emacs-30 branch?
>
> I vote for breaking compatibility.
And against the additional variable to make this more
backward-compatible?
^ permalink raw reply [flat|nested] 37+ messages in thread
* bug#70007: [PATCH] native JSON encoder
2024-08-25 18:26 ` Eli Zaretskii
@ 2024-08-25 19:20 ` Stefan Monnier via Bug reports for GNU Emacs, the Swiss army knife of text editors
2024-08-31 22:15 ` Stefan Kangas
0 siblings, 1 reply; 37+ messages in thread
From: Stefan Monnier via Bug reports for GNU Emacs, the Swiss army knife of text editors @ 2024-08-25 19:20 UTC (permalink / raw)
To: Eli Zaretskii; +Cc: mattias.engdegard, acorallo, casouri, stefankangas, 70007
> And against the additional variable to make this more
> backward-compatible?
Yup. The var would be my second-best choice (and I assume it's
immediately declared obsolete).
Stefan
^ permalink raw reply [flat|nested] 37+ messages in thread
* bug#70007: [PATCH] native JSON encoder
2024-08-25 17:55 ` Stefan Monnier via Bug reports for GNU Emacs, the Swiss army knife of text editors
2024-08-25 18:26 ` Eli Zaretskii
@ 2024-08-25 20:08 ` Mattias Engdegård
2024-08-31 9:45 ` Eli Zaretskii
1 sibling, 1 reply; 37+ messages in thread
From: Mattias Engdegård @ 2024-08-25 20:08 UTC (permalink / raw)
To: Stefan Monnier
Cc: Eli Zaretskii, Andrea Corallo, casouri, Stefan Kangas, 70007
25 aug. 2024 kl. 19.55 skrev Stefan Monnier <monnier@iro.umontreal.ca>:
> The variable solution has one advantage: users can (setq ... t) in their
> `.emacs` to keep old code working, tho potentially at the cost of
> breaking new code.
Actually that's a terrible solution because it affects all code, not just a single package. In addition, it will linger in the user's system indefinitely (.emacs being the prime bioaccumulation tissue) and potentially cause trouble when he or she installs a perfectly working package years later.
So my preferences are:
First place: always unibyte, with no controlling variable.
Very close second place: always multibyte, idem.
Distant third place: contrived new function name for unibyte, like json-serialize-encode (no, I don't like it either).
Very distant fourth place: variable controlling the string type returned.
^ permalink raw reply [flat|nested] 37+ messages in thread
* bug#70007: [PATCH] native JSON encoder
2024-08-25 20:08 ` Mattias Engdegård
@ 2024-08-31 9:45 ` Eli Zaretskii
0 siblings, 0 replies; 37+ messages in thread
From: Eli Zaretskii @ 2024-08-31 9:45 UTC (permalink / raw)
To: Stefan Kangas, Andrea Corallo
Cc: Mattias Engdegård, casouri, monnier, 70007
> From: Mattias Engdegård <mattias.engdegard@gmail.com>
> Date: Sun, 25 Aug 2024 22:08:30 +0200
> Cc: Eli Zaretskii <eliz@gnu.org>,
> Stefan Kangas <stefankangas@gmail.com>,
> Andrea Corallo <acorallo@gnu.org>,
> casouri@gmail.com,
> 70007@debbugs.gnu.org
>
> 25 aug. 2024 kl. 19.55 skrev Stefan Monnier <monnier@iro.umontreal.ca>:
>
> > The variable solution has one advantage: users can (setq ... t) in their
> > `.emacs` to keep old code working, tho potentially at the cost of
> > breaking new code.
>
> Actually that's a terrible solution because it affects all code, not just a single package. In addition, it will linger in the user's system indefinitely (.emacs being the prime bioaccumulation tissue) and potentially cause trouble when he or she installs a perfectly working package years later.
>
> So my preferences are:
>
> First place: always unibyte, with no controlling variable.
> Very close second place: always multibyte, idem.
> Distant third place: contrived new function name for unibyte, like json-serialize-encode (no, I don't like it either).
> Very distant fourth place: variable controlling the string type returned.
Stefan Kangas and Andrea, can I have your opinions on this, please?
^ permalink raw reply [flat|nested] 37+ messages in thread
* bug#70007: [PATCH] native JSON encoder
2024-08-25 19:20 ` Stefan Monnier via Bug reports for GNU Emacs, the Swiss army knife of text editors
@ 2024-08-31 22:15 ` Stefan Kangas
2024-09-07 7:26 ` Eli Zaretskii
0 siblings, 1 reply; 37+ messages in thread
From: Stefan Kangas @ 2024-08-31 22:15 UTC (permalink / raw)
To: Stefan Monnier, Eli Zaretskii; +Cc: mattias.engdegard, acorallo, casouri, 70007
Stefan Monnier <monnier@iro.umontreal.ca> writes:
>> And against the additional variable to make this more
>> backward-compatible?
>
> Yup. The var would be my second-best choice (and I assume it's
> immediately declared obsolete).
I tend to agree with Stefan M here.
^ permalink raw reply [flat|nested] 37+ messages in thread
* bug#70007: [PATCH] native JSON encoder
2024-08-31 22:15 ` Stefan Kangas
@ 2024-09-07 7:26 ` Eli Zaretskii
2024-09-07 15:48 ` Andrea Corallo
0 siblings, 1 reply; 37+ messages in thread
From: Eli Zaretskii @ 2024-09-07 7:26 UTC (permalink / raw)
To: acorallo, Stefan Kangas; +Cc: mattias.engdegard, casouri, monnier, 70007
> From: Stefan Kangas <stefankangas@gmail.com>
> Date: Sat, 31 Aug 2024 15:15:25 -0700
> Cc: mattias.engdegard@gmail.com, acorallo@gnu.org, casouri@gmail.com,
> 70007@debbugs.gnu.org
>
> Stefan Monnier <monnier@iro.umontreal.ca> writes:
>
> >> And against the additional variable to make this more
> >> backward-compatible?
> >
> > Yup. The var would be my second-best choice (and I assume it's
> > immediately declared obsolete).
>
> I tend to agree with Stefan M here.
Thanks.
Andrea, would you please voice your opinion on this?
^ permalink raw reply [flat|nested] 37+ messages in thread
* bug#70007: [PATCH] native JSON encoder
2024-09-07 7:26 ` Eli Zaretskii
@ 2024-09-07 15:48 ` Andrea Corallo
2024-09-07 15:52 ` Eli Zaretskii
0 siblings, 1 reply; 37+ messages in thread
From: Andrea Corallo @ 2024-09-07 15:48 UTC (permalink / raw)
To: Eli Zaretskii; +Cc: mattias.engdegard, 70007, casouri, Stefan Kangas, monnier
Eli Zaretskii <eliz@gnu.org> writes:
>> From: Stefan Kangas <stefankangas@gmail.com>
>> Date: Sat, 31 Aug 2024 15:15:25 -0700
>> Cc: mattias.engdegard@gmail.com, acorallo@gnu.org, casouri@gmail.com,
>> 70007@debbugs.gnu.org
>>
>> Stefan Monnier <monnier@iro.umontreal.ca> writes:
>>
>> >> And against the additional variable to make this more
>> >> backward-compatible?
>> >
>> > Yup. The var would be my second-best choice (and I assume it's
>> > immediately declared obsolete).
>>
>> I tend to agree with Stefan M here.
>
> Thanks.
>
> Andrea, would you please voice your opinion on this?
I'm for returning unibyte indeed. And for the variable or the second
function I'm kind of neutral, I'd do it only if it's not too much effort
so I'll trust Mattias preference here.
Andrea
^ permalink raw reply [flat|nested] 37+ messages in thread
* bug#70007: [PATCH] native JSON encoder
2024-09-07 15:48 ` Andrea Corallo
@ 2024-09-07 15:52 ` Eli Zaretskii
2024-09-08 18:33 ` Mattias Engdegård
0 siblings, 1 reply; 37+ messages in thread
From: Eli Zaretskii @ 2024-09-07 15:52 UTC (permalink / raw)
To: Andrea Corallo; +Cc: mattias.engdegard, 70007, casouri, stefankangas, monnier
> From: Andrea Corallo <acorallo@gnu.org>
> Cc: Stefan Kangas <stefankangas@gmail.com>, monnier@iro.umontreal.ca,
> mattias.engdegard@gmail.com, casouri@gmail.com, 70007@debbugs.gnu.org
> Date: Sat, 07 Sep 2024 11:48:36 -0400
>
> Eli Zaretskii <eliz@gnu.org> writes:
>
> >> From: Stefan Kangas <stefankangas@gmail.com>
> >> Date: Sat, 31 Aug 2024 15:15:25 -0700
> >> Cc: mattias.engdegard@gmail.com, acorallo@gnu.org, casouri@gmail.com,
> >> 70007@debbugs.gnu.org
> >>
> >> Stefan Monnier <monnier@iro.umontreal.ca> writes:
> >>
> >> >> And against the additional variable to make this more
> >> >> backward-compatible?
> >> >
> >> > Yup. The var would be my second-best choice (and I assume it's
> >> > immediately declared obsolete).
> >>
> >> I tend to agree with Stefan M here.
> >
> > Thanks.
> >
> > Andrea, would you please voice your opinion on this?
>
> I'm for returning unibyte indeed. And for the variable or the second
> function I'm kind of neutral, I'd do it only if it's not too much effort
> so I'll trust Mattias preference here.
OK, so let's go with unconditionally unibyte, as it seems to be the
consensus here.
^ permalink raw reply [flat|nested] 37+ messages in thread
* bug#70007: [PATCH] native JSON encoder
2024-09-07 15:52 ` Eli Zaretskii
@ 2024-09-08 18:33 ` Mattias Engdegård
0 siblings, 0 replies; 37+ messages in thread
From: Mattias Engdegård @ 2024-09-08 18:33 UTC (permalink / raw)
To: Eli Zaretskii; +Cc: casouri, Andrea Corallo, 70007, stefankangas, monnier
7 sep. 2024 kl. 17.52 skrev Eli Zaretskii <eliz@gnu.org>:
> OK, so let's go with unconditionally unibyte, as it seems to be the
> consensus here.
That change has now been pushed to emacs-30. Feel free to adjust as required.
^ permalink raw reply [flat|nested] 37+ messages in thread
end of thread, other threads:[~2024-09-08 18:33 UTC | newest]
Thread overview: 37+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-03-26 15:33 bug#70007: [PATCH] native JSON encoder Mattias Engdegård
2024-03-26 16:46 ` Eli Zaretskii
2024-03-27 12:46 ` Mattias Engdegård
2024-03-27 15:49 ` Mattias Engdegård
2024-03-27 17:40 ` Eli Zaretskii
2024-03-27 18:57 ` Mattias Engdegård
2024-03-27 19:05 ` Eli Zaretskii
2024-03-28 20:59 ` Mattias Engdegård
2024-03-29 6:04 ` Eli Zaretskii
2024-03-30 11:41 ` Mattias Engdegård
2024-03-30 13:22 ` Eli Zaretskii
2024-03-30 14:22 ` Mattias Engdegård
2024-03-30 16:14 ` Richard Copley
2024-03-30 16:40 ` Eli Zaretskii
2024-03-30 23:29 ` Richard Copley
2024-03-30 16:45 ` Andy Moreton
2024-03-30 20:36 ` Corwin Brust
2024-03-30 16:37 ` Eli Zaretskii
2024-03-30 20:21 ` Mattias Engdegård
2024-04-02 14:13 ` Mattias Engdegård
2024-04-02 16:13 ` Eli Zaretskii
2024-04-02 17:19 ` Mattias Engdegård
2024-08-24 15:33 ` Mattias Engdegård
2024-08-24 16:14 ` Eli Zaretskii
2024-08-24 19:45 ` Mattias Engdegård
2024-08-25 5:07 ` Eli Zaretskii
2024-08-25 17:55 ` Stefan Monnier via Bug reports for GNU Emacs, the Swiss army knife of text editors
2024-08-25 18:26 ` Eli Zaretskii
2024-08-25 19:20 ` Stefan Monnier via Bug reports for GNU Emacs, the Swiss army knife of text editors
2024-08-31 22:15 ` Stefan Kangas
2024-09-07 7:26 ` Eli Zaretskii
2024-09-07 15:48 ` Andrea Corallo
2024-09-07 15:52 ` Eli Zaretskii
2024-09-08 18:33 ` Mattias Engdegård
2024-08-25 20:08 ` Mattias Engdegård
2024-08-31 9:45 ` Eli Zaretskii
2024-03-28 19:16 ` Theodor Thornhill via Bug reports for GNU Emacs, the Swiss army knife of text editors
Code repositories for project(s) associated with this external index
https://git.savannah.gnu.org/cgit/emacs.git
https://git.savannah.gnu.org/cgit/emacs/org-mode.git
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.