/* JSON parsing and serialization. Copyright (C) 2017 Free Software Foundation, Inc. This file is part of GNU Emacs. GNU Emacs is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. GNU Emacs is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with GNU Emacs. If not, see . */ #include #include #include #include #include #include #include "lisp.h" #include "buffer.h" #include "coding.h" /* We install a custom allocator so that we can avoid objects larger than PTRDIFF_MAX. Such objects wouldn’t play well with the rest of Emacs’s codebase, which generally uses ptrdiff_t for sizes and indices. The other functions in this file also generally assume that size_t values never exceed PTRDIFF_MAX. */ static void * json_malloc (size_t size) { if (size > PTRDIFF_MAX) { errno = ENOMEM; return NULL; } return malloc (size); } static void json_free (void *ptr) { free (ptr); } void init_json (void) { json_set_alloc_funcs (json_malloc, json_free); } /* Return whether STRING starts with PREFIX. */ static bool json_has_prefix (const char *string, const char *prefix) { size_t string_len = strlen (string); size_t prefix_len = strlen (prefix); return string_len >= prefix_len && memcmp (string, prefix, prefix_len) == 0; } /* Return whether STRING ends with SUFFIX. */ static bool json_has_suffix (const char *string, const char *suffix) { size_t string_len = strlen (string); size_t suffix_len = strlen (suffix); return string_len >= suffix_len && memcmp (string + string_len - suffix_len, suffix, suffix_len) == 0; } /* Create a multibyte Lisp string from the UTF-8 string in [DATA, DATA + SIZE). If the range [DATA, DATA + SIZE) does not contain a valid UTF-8 string, an unspecified string is returned. */ static Lisp_Object json_make_string (const char *data, ptrdiff_t size) { return code_convert_string (make_specified_string (data, -1, size, false), Qutf_8_unix, Qt, false, true, true); } /* Create a multibyte Lisp string from the null-terminated UTF-8 string beginning at DATA. If the string is not a valid UTF-8 string, an unspecified string is returned. */ static Lisp_Object json_build_string (const char *data) { return json_make_string (data, strlen (data)); } /* Return a unibyte string containing the sequence of UTF-8 encoding units of the UTF-8 representation of STRING. If STRING does not represent a sequence of Unicode scalar values, return a string with unspecified contents. */ static Lisp_Object json_encode (Lisp_Object string) { return code_convert_string (string, Qutf_8_unix, Qt, true, true, true); } static _Noreturn void json_out_of_memory (void) { xsignal0 (Qjson_out_of_memory); } /* Signal a Lisp error corresponding to the JSON ERROR. */ static _Noreturn void json_parse_error (const json_error_t *error) { Lisp_Object symbol; /* FIXME: Upstream Jansson should have a way to return error codes without parsing the error messages. See https://github.com/akheron/jansson/issues/352. */ if (json_has_suffix (error->text, "expected near end of file")) symbol = Qjson_end_of_file; else if (json_has_prefix (error->text, "end of file expected")) symbol = Qjson_trailing_content; else symbol = Qjson_parse_error; xsignal (symbol, list5 (json_build_string (error->text), json_build_string (error->source), make_natnum (error->line), make_natnum (error->column), make_natnum (error->position))); } static void json_release_object (void *object) { json_decref (object); } /* Signal an error if OBJECT is not a string, or if OBJECT contains embedded null characters. */ static void check_string_without_embedded_nulls (Lisp_Object object) { CHECK_STRING (object); CHECK_TYPE (memchr (SDATA (object), '\0', SBYTES (object)) == NULL, Qstring_without_embedded_nulls_p, object); } /* Signal an error of type `json-out-of-memory' if OBJECT is NULL. */ static json_t * json_check (json_t *object) { if (object == NULL) json_out_of_memory (); return object; } static json_t *lisp_to_json (Lisp_Object); /* Convert a Lisp object to a toplevel JSON object (array or object). This returns Lisp_Object so we can use unbind_to. The return value is always nil. */ static _GL_ARG_NONNULL ((2)) Lisp_Object lisp_to_json_toplevel_1 (Lisp_Object lisp, json_t **json) { if (VECTORP (lisp)) { ptrdiff_t size = ASIZE (lisp); *json = json_check (json_array ()); ptrdiff_t count = SPECPDL_INDEX (); record_unwind_protect_ptr (json_release_object, json); for (ptrdiff_t i = 0; i < size; ++i) { int status = json_array_append_new (*json, lisp_to_json (AREF (lisp, i))); if (status == -1) json_out_of_memory (); } eassert (json_array_size (*json) == size); clear_unwind_protect (count); return unbind_to (count, Qnil); } else if (HASH_TABLE_P (lisp)) { struct Lisp_Hash_Table *h = XHASH_TABLE (lisp); *json = json_check (json_object ()); ptrdiff_t count = SPECPDL_INDEX (); record_unwind_protect_ptr (json_release_object, *json); for (ptrdiff_t i = 0; i < HASH_TABLE_SIZE (h); ++i) if (!NILP (HASH_HASH (h, i))) { Lisp_Object key = json_encode (HASH_KEY (h, i)); /* We can’t specify the length, so the string must be null-terminated. */ check_string_without_embedded_nulls (key); int status = json_object_set_new (*json, SSDATA (key), lisp_to_json (HASH_VALUE (h, i))); if (status == -1) json_out_of_memory (); } clear_unwind_protect (count); return unbind_to (count, Qnil); } wrong_type_argument (Qjson_value_p, lisp); } /* Convert LISP to a toplevel JSON object (array or object). Signal an error of type `wrong-type-argument' if LISP is not a vector or hashtable. */ static json_t * lisp_to_json_toplevel (Lisp_Object lisp) { if (++lisp_eval_depth > max_lisp_eval_depth) xsignal0 (Qjson_object_too_deep); json_t *json; lisp_to_json_toplevel_1 (lisp, &json); --lisp_eval_depth; return json; } /* Convert LISP to any JSON object. Signal an error of type `wrong-type-argument' if the type of LISP can't be converted to a JSON object. */ static json_t * lisp_to_json (Lisp_Object lisp) { if (EQ (lisp, QCnull)) return json_check (json_null ()); else if (EQ (lisp, QCfalse)) return json_check (json_false ()); else if (EQ (lisp, Qt)) return json_check (json_true ()); else if (INTEGERP (lisp)) { CHECK_TYPE_RANGED_INTEGER (json_int_t, lisp); return json_check (json_integer (XINT (lisp))); } else if (FLOATP (lisp)) return json_check (json_real (XFLOAT_DATA (lisp))); else if (STRINGP (lisp)) { Lisp_Object encoded = json_encode (lisp); ptrdiff_t size = SBYTES (encoded); return json_check (json_stringn (SSDATA (encoded), size)); } /* LISP now must be a vector or hashtable. */ return lisp_to_json_toplevel (lisp); } DEFUN ("json-serialize", Fjson_serialize, Sjson_serialize, 1, 1, NULL, doc: /* Return the JSON representation of OBJECT as a string. OBJECT must be a vector or hashtable, and its elements can recursively contain `:null', `:false', t, numbers, strings, or other vectors and hashtables. `:null', `:false', and t will be converted to JSON null, false, and true values, respectively. Vectors will be converted to JSON arrays, and hashtables to JSON objects. Hashtable keys must be strings without embedded null characters and must be unique within each object. */) (Lisp_Object object) { ptrdiff_t count = SPECPDL_INDEX (); json_t *json = lisp_to_json_toplevel (object); record_unwind_protect_ptr (json_release_object, json); char *string = json_dumps (json, JSON_COMPACT); if (string == NULL) json_out_of_memory (); record_unwind_protect_ptr (free, string); return unbind_to (count, json_build_string (string)); } struct json_buffer_and_size { const char *buffer; ptrdiff_t size; }; static Lisp_Object json_insert (void *data) { struct json_buffer_and_size *buffer_and_size = data; /* FIXME: This should be possible without creating an intermediate string object. */ Lisp_Object string = json_make_string (buffer_and_size->buffer, buffer_and_size->size); insert1 (string); return Qnil; } struct json_insert_data { /* nil if json_insert succeeded, otherwise the symbol Qcatch_all_memory_full or a cons (ERROR-SYMBOL . ERROR-DATA). */ Lisp_Object error; }; /* Callback for json_dump_callback that inserts the UTF-8 string in [BUFFER, BUFFER + SIZE) into the current buffer. If [BUFFER, BUFFER + SIZE) does not contain a valid UTF-8 string, an unspecified string is inserted into the buffer. DATA must point to a structure of type json_insert_data. This function may not exit nonlocally. It catches all nonlocal exits and stores them in data->error for reraising. */ static int json_insert_callback (const char *buffer, size_t size, void *data) { struct json_insert_data *d = data; struct json_buffer_and_size buffer_and_size = {.buffer = buffer, .size = size}; d->error = internal_catch_all (json_insert, &buffer_and_size, Fidentity); return NILP (d->error) ? 0 : -1; } DEFUN ("json-insert", Fjson_insert, Sjson_insert, 1, 1, NULL, doc: /* Insert the JSON representation of OBJECT before point. This is the same as (insert (json-serialize OBJECT)), but potentially faster. See the function `json-serialize' for allowed values of OBJECT. */) (Lisp_Object object) { ptrdiff_t count = SPECPDL_INDEX (); json_t *json = lisp_to_json (object); record_unwind_protect_ptr (json_release_object, json); struct json_insert_data data; int status = json_dump_callback (json, json_insert_callback, &data, JSON_COMPACT); if (status == -1) { if (CONSP (data.error)) xsignal (XCAR (data.error), XCDR (data.error)); else json_out_of_memory (); } return unbind_to (count, Qnil); } /* Convert a JSON object to a Lisp object. */ static _GL_ARG_NONNULL ((1)) Lisp_Object json_to_lisp (json_t *json) { switch (json_typeof (json)) { case JSON_NULL: return QCnull; case JSON_FALSE: return QCfalse; case JSON_TRUE: return Qt; case JSON_INTEGER: /* Return an integer if possible, a floating-point number otherwise. This loses precision for integers with large magnitude; however, such integers tend to be nonportable anyway because many JSON implementations use only 64-bit floating-point numbers with 53 mantissa bits. See https://tools.ietf.org/html/rfc7159#section-6 for some discussion. */ return make_fixnum_or_float (json_integer_value (json)); case JSON_REAL: return make_float (json_real_value (json)); case JSON_STRING: return json_make_string (json_string_value (json), json_string_length (json)); case JSON_ARRAY: { if (++lisp_eval_depth > max_lisp_eval_depth) xsignal0 (Qjson_object_too_deep); size_t size = json_array_size (json); if (FIXNUM_OVERFLOW_P (size)) xsignal0 (Qoverflow_error); Lisp_Object result = Fmake_vector (make_natnum (size), Qunbound); for (ptrdiff_t i = 0; i < size; ++i) ASET (result, i, json_to_lisp (json_array_get (json, i))); --lisp_eval_depth; return result; } case JSON_OBJECT: { if (++lisp_eval_depth > max_lisp_eval_depth) xsignal0 (Qjson_object_too_deep); size_t size = json_object_size (json); if (FIXNUM_OVERFLOW_P (size)) xsignal0 (Qoverflow_error); Lisp_Object result = CALLN (Fmake_hash_table, QCtest, Qequal, QCsize, make_natnum (size)); struct Lisp_Hash_Table *h = XHASH_TABLE (result); const char *key_str; json_t *value; json_object_foreach (json, key_str, value) { Lisp_Object key = json_build_string (key_str); EMACS_UINT hash; ptrdiff_t i = hash_lookup (h, key, &hash); /* Keys in JSON objects are unique, so the key can’t be present yet. */ eassert (i < 0); hash_put (h, key, json_to_lisp (value), hash); } --lisp_eval_depth; return result; } } /* Can’t get here. */ emacs_abort (); } DEFUN ("json-parse-string", Fjson_parse_string, Sjson_parse_string, 1, 1, NULL, doc: /* Parse the JSON STRING into a Lisp object. This is essentially the reverse operation of `json-serialize', which see. The returned object will be a vector or hashtable. Its elements will be `:null', `:false', t, numbers, strings, or further vectors and hashtables. If there are duplicate keys in an object, all but the last one are ignored. If STRING doesn't contain a valid JSON object, an error of type `json-parse-error' is signaled. */) (Lisp_Object string) { ptrdiff_t count = SPECPDL_INDEX (); Lisp_Object encoded = json_encode (string); check_string_without_embedded_nulls (encoded); json_error_t error; json_t *object = json_loads (SSDATA (encoded), 0, &error); if (object == NULL) json_parse_error (&error); /* Avoid leaking the object in case of further errors. */ if (object != NULL) record_unwind_protect_ptr (json_release_object, object); return unbind_to (count, json_to_lisp (object)); } struct json_read_buffer_data { /* Byte position of position to read the next chunk from. */ ptrdiff_t point; }; /* Callback for json_load_callback that reads from the current buffer. DATA must point to a structure of type json_read_buffer_data. data->point must point to the byte position to read from; after reading, data->point is advanced accordingly. The buffer point itself is ignored. This function may not exit nonlocally. */ static size_t json_read_buffer_callback (void *buffer, size_t buflen, void *data) { struct json_read_buffer_data *d = data; /* First, parse from point to the gap or the end of the accessible portion, whatever is closer. */ ptrdiff_t point = d->point; ptrdiff_t end = BUFFER_CEILING_OF (point) + 1; ptrdiff_t count = end - point; if (buflen < count) count = buflen; memcpy (buffer, BYTE_POS_ADDR (point), count); d->point += count; return count; } DEFUN ("json-parse-buffer", Fjson_parse_buffer, Sjson_parse_buffer, 0, 0, NULL, doc: /* Read JSON object from current buffer starting at point. This is similar to `json-parse-string', which see. Move point after the end of the object if parsing was successful. On error, point is not moved. */) (void) { ptrdiff_t count = SPECPDL_INDEX (); ptrdiff_t point = PT_BYTE; struct json_read_buffer_data data = {.point = point}; json_error_t error; json_t *object = json_load_callback (json_read_buffer_callback, &data, JSON_DISABLE_EOF_CHECK, &error); if (object == NULL) json_parse_error (&error); /* Avoid leaking the object in case of further errors. */ record_unwind_protect_ptr (json_release_object, object); /* Convert and then move point only if everything succeeded. */ Lisp_Object lisp = json_to_lisp (object); /* Adjust point by how much we just read. */ point += error.position; SET_PT_BOTH (BYTE_TO_CHAR (point), point); return unbind_to (count, lisp); } /* Simplified version of ‘define-error’ that works with pure objects. */ static void define_error (Lisp_Object name, const char *message, Lisp_Object parent) { eassert (SYMBOLP (name)); eassert (SYMBOLP (parent)); Lisp_Object parent_conditions = Fget (parent, Qerror_conditions); eassert (CONSP (parent_conditions)); eassert (!NILP (Fmemq (parent, parent_conditions))); eassert (NILP (Fmemq (name, parent_conditions))); Fput (name, Qerror_conditions, pure_cons (name, parent_conditions)); Fput (name, Qerror_message, build_pure_c_string (message)); } void syms_of_json (void) { DEFSYM (QCnull, ":null"); DEFSYM (QCfalse, ":false"); DEFSYM (Qstring_without_embedded_nulls_p, "string-without-embedded-nulls-p"); DEFSYM (Qjson_value_p, "json-value-p"); DEFSYM (Qutf_8_unix, "utf-8-unix"); DEFSYM (Qjson_error, "json-error"); DEFSYM (Qjson_out_of_memory, "json-out-of-memory"); DEFSYM (Qjson_parse_error, "json-parse-error"); DEFSYM (Qjson_end_of_file, "json-end-of-file"); DEFSYM (Qjson_trailing_content, "json-trailing-content"); DEFSYM (Qjson_object_too_deep, "json-object-too-deep"); define_error (Qjson_error, "generic JSON error", Qerror); define_error (Qjson_out_of_memory, "not enough memory for creating JSON object", Qjson_error); define_error (Qjson_parse_error, "could not parse JSON stream", Qjson_error); define_error (Qjson_end_of_file, "end of JSON stream", Qjson_parse_error); define_error (Qjson_trailing_content, "trailing content after JSON stream", Qjson_parse_error); define_error (Qjson_object_too_deep, "object cyclic or Lisp evaluation too deep", Qjson_error); DEFSYM (Qpure, "pure"); DEFSYM (Qside_effect_free, "side-effect-free"); DEFSYM (Qjson_serialize, "json-serialize"); DEFSYM (Qjson_parse_string, "json-parse-string"); Fput (Qjson_serialize, Qpure, Qt); Fput (Qjson_serialize, Qside_effect_free, Qt); Fput (Qjson_parse_string, Qpure, Qt); Fput (Qjson_parse_string, Qside_effect_free, Qt); defsubr (&Sjson_serialize); defsubr (&Sjson_insert); defsubr (&Sjson_parse_string); defsubr (&Sjson_parse_buffer); }