diff --git a/mysql-test/main/mdev_32854.result b/mysql-test/main/mdev_32854.result index 39ae825e8d1..389f18c1410 100644 --- a/mysql-test/main/mdev_32854.result +++ b/mysql-test/main/mdev_32854.result @@ -452,3 +452,14 @@ SELECT JSON_VALUE(@json_doc_arr_lev45_valid, '$[1][1][1][1][1][1][1][1][1][1][1] JSON_VALUE(@json_doc_arr_lev45_valid, '$[1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1]') 45 DROP TABLE t1 ,t2; +# +# MDEV-37082: Sig 11 in json_normalize_sort +# +CREATE TABLE t (a JSON)Engine=InnoDB; +INSERT INTO t VALUES (CONCAT (REPEAT('{"v":',5000),'1',REPEAT('}',5000))); +INSERT INTO t VALUES (CONCAT (REPEAT('[',5000),'1',REPEAT(']',5000))); +SELECT JSON_EQUALS (a,a) FROM t; +JSON_EQUALS (a,a) +1 +1 +DROP TABLE t; diff --git a/mysql-test/main/mdev_32854.test b/mysql-test/main/mdev_32854.test index 4332736f448..e68f3af8ff3 100644 --- a/mysql-test/main/mdev_32854.test +++ b/mysql-test/main/mdev_32854.test @@ -90,7 +90,6 @@ SELECT JSON_SCHEMA_VALID(@json_schema, @json_doc_obj_lev45_valid) AS found_path; SELECT JSON_SCHEMA_VALID(@json_schema, @json_doc_obj_lev45_valid2); - SELECT JSON_SEARCH(@json_doc_obj_lev45_valid,'one', 'This is level 45'); SET @json_updated = JSON_SET(@json_doc_obj_lev45_valid, @@ -111,3 +110,18 @@ SELECT JSON_VALUE(@json_doc_obj_lev45_valid, '$.level1.level2.level3.level4.leve SELECT JSON_VALUE(@json_doc_arr_lev45_valid, '$[1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1][1]'); DROP TABLE t1 ,t2; + +--echo # +--echo # MDEV-37082: Sig 11 in json_normalize_sort +--echo # + +--source include/have_innodb.inc + +CREATE TABLE t (a JSON)Engine=InnoDB; + +INSERT INTO t VALUES (CONCAT (REPEAT('{"v":',5000),'1',REPEAT('}',5000))); +INSERT INTO t VALUES (CONCAT (REPEAT('[',5000),'1',REPEAT(']',5000))); + +SELECT JSON_EQUALS (a,a) FROM t; + +DROP TABLE t; diff --git a/strings/json_normalize.c b/strings/json_normalize.c index 5e2e9b43e86..81575f07069 100644 --- a/strings/json_normalize.c +++ b/strings/json_normalize.c @@ -24,6 +24,35 @@ #define JSON_MALLOC_FLAGS MYF(MY_THREAD_SPECIFIC|MY_WME) #endif + +enum json_norm_visit_state { + UNPROCESSED, + CLOSE_NON_SCALAR, + WRITE_KEY, + WRITE_ITEM_SEPARATOR +}; + +/* + The json_norm_frame is used to simulate recurssion using iterative fashion. + + -> val: Which json node we are processing. + -> index: Which child we are on. + For object, eg: {"a":1, "b":2 } then a is child 0 and b is child 1. + For array, eg: [1, 2, 3] 1 is child 0, 2 is child 1, 3 is child 2. + -> visited: Which phase of processing we are at. Mainly used for printing. + UNPROCESSED: First time we saw this node. If it is an object or array, + set to 1 and push the frame in the stack. + CLOSE_NON_SCALAR: You are seeing the node after processing the children + WRITE_KEY: print object key for index + WRITE_ITEM_SEPARATOR: print array comma for index +*/ +struct json_norm_frame +{ + struct json_norm_value *val; + size_t index; + enum json_norm_visit_state visited; +}; + /* From the EXPIRED DRAFT JSON Canonical Form https://datatracker.ietf.org/doc/html/draft-staykov-hu-json-canonical-form-00 @@ -267,8 +296,8 @@ json_norm_array_append_value(struct json_norm_array *arr, int json_norm_init_dynamic_array(size_t element_size, void *where) { - const size_t init_alloc= 20; - const size_t alloc_increment= 20; + const size_t init_alloc= JSON_DEPTH_INC; + const size_t alloc_increment= JSON_DEPTH_INC; return my_init_dynamic_array(PSI_JSON, where, element_size, init_alloc, alloc_increment, JSON_MALLOC_FLAGS); @@ -317,192 +346,370 @@ static int json_norm_kv_comp(const void *a_, const void *b_) } -static void -json_normalize_sort(struct json_norm_value *val) -{ - switch (val->type) { - case JSON_VALUE_OBJECT: - { - size_t i; - DYNAMIC_ARRAY *pairs= &val->value.object.kv_pairs; - for (i= 0; i < pairs->elements; ++i) - { - struct json_norm_kv *kv= dynamic_element(pairs, i, struct json_norm_kv*); - json_normalize_sort(&kv->value); - } - - my_qsort(dynamic_element(pairs, 0, struct json_norm_kv*), - pairs->elements, sizeof(struct json_norm_kv), json_norm_kv_comp); - break; - } - case JSON_VALUE_ARRAY: - { - /* Arrays in JSON must keep the order. Just recursively sort values. */ - size_t i; - DYNAMIC_ARRAY *values= &val->value.array.values; - for (i= 0; i < values->elements; ++i) - { - struct json_norm_value *value; - value= dynamic_element(values, i, struct json_norm_value*); - json_normalize_sort(value); - } - - break; - } - case JSON_VALUE_UNINITIALIZED: - DBUG_ASSERT(0); - break; - default: /* Nothing to do for other types. */ - break; - } -} - - -static void -json_norm_value_free(struct json_norm_value *val) +/* + The function is an iterative DFS, walks the entire json and + sorts the key-value iteratively. Arrays are only traversed, only + objects are sorted. +*/ +static void json_normalize_sort(struct json_norm_value *root) { + DYNAMIC_ARRAY stack; + struct json_norm_frame frame, child; + struct json_norm_value *val; size_t i; - switch (val->type) { - case JSON_VALUE_OBJECT: - { - struct json_norm_object *obj= &val->value.object; - DYNAMIC_ARRAY *pairs_arr= &obj->kv_pairs; - for (i= 0; i < pairs_arr->elements; ++i) - { - struct json_norm_kv *kv; - kv= dynamic_element(pairs_arr, i, struct json_norm_kv *); - json_norm_string_free(&kv->key); - json_norm_value_free(&kv->value); - } - delete_dynamic(pairs_arr); - break; - } - case JSON_VALUE_ARRAY: - { - struct json_norm_array *arr= &val->value.array; + if (json_norm_init_dynamic_array(sizeof(struct json_norm_frame), &stack)) + return; - DYNAMIC_ARRAY *values_arr= &arr->values; - for (i= 0; i < arr->values.elements; ++i) - { - struct json_norm_value *jt_value; - jt_value= dynamic_element(values_arr, i, struct json_norm_value *); - json_norm_value_free(jt_value); - } - delete_dynamic(values_arr); - break; - } - case JSON_VALUE_STRING: + frame.val= root; + frame.index= 0; + frame.visited= UNPROCESSED; + push_dynamic(&stack, &frame); + + do { - json_norm_string_free(&val->value.string); - break; - } - case JSON_VALUE_NUMBER: - json_norm_number_free(&val->value.number); - break; - case JSON_VALUE_NULL: - case JSON_VALUE_TRUE: - case JSON_VALUE_FALSE: - case JSON_VALUE_UNINITIALIZED: - break; - } - val->type= JSON_VALUE_UNINITIALIZED; + frame= *(struct json_norm_frame *)pop_dynamic(&stack); + val= frame.val; + + if (!val) + continue; + + if (frame.visited > UNPROCESSED) + { + if (val->type == JSON_VALUE_OBJECT) + { + my_qsort(dynamic_element(&val->value.object.kv_pairs, 0, + struct json_norm_kv*), val->value.object.kv_pairs.elements, + sizeof(struct json_norm_kv), json_norm_kv_comp); + } + continue; + } + + frame.visited= CLOSE_NON_SCALAR; + push_dynamic(&stack, &frame); + + switch (val->type) + { + case JSON_VALUE_OBJECT: + { + DYNAMIC_ARRAY *pairs= &val->value.object.kv_pairs; + for (i= pairs->elements; i > 0; i--) + { + struct json_norm_kv *kv= + dynamic_element(pairs, i-1, struct json_norm_kv*); + child.val= &kv->value; + child.index= 0; + child.visited= UNPROCESSED; + push_dynamic(&stack, &child); + } + break; + } + + case JSON_VALUE_ARRAY: + { + DYNAMIC_ARRAY *values= &val->value.array.values; + for (i= values->elements; i > 0; i--) + { + struct json_norm_value *child_val= + dynamic_element(values, i-1, struct json_norm_value*); + child.val= child_val; + child.index= 0; + child.visited= UNPROCESSED; + push_dynamic(&stack, &child); + } + break; + } + + default: + break; + } + } while (stack.elements); + + delete_dynamic(&stack); + + return; } -static int -json_norm_to_string(DYNAMIC_STRING *buf, struct json_norm_value *val) +/* + Free the entire JSON tree iteratively. +*/ +static void json_norm_value_free(struct json_norm_value *root) { - switch (val->type) - { - case JSON_VALUE_OBJECT: - { - size_t i; - struct json_norm_object *obj= &val->value.object; - DYNAMIC_ARRAY *pairs_arr= &obj->kv_pairs; + DYNAMIC_ARRAY stack; + struct json_norm_frame frame; + struct json_norm_value *val; + size_t i; - if (dynstr_append_mem(buf, STRING_WITH_LEN("{"))) - return 1; + if (json_norm_init_dynamic_array(sizeof(struct json_norm_frame), &stack)) + return; - for (i= 0; i < pairs_arr->elements; ++i) + frame.val= root; + frame.index= 0; + frame.visited= UNPROCESSED; + push_dynamic(&stack, &frame); + + do + { + frame= *(struct json_norm_frame *)pop_dynamic(&stack); + val= frame.val; + + if (!val) + continue; + + if (frame.visited > UNPROCESSED) { - struct json_norm_kv *kv; - kv= dynamic_element(pairs_arr, i, struct json_norm_kv *); + switch (val->type) + { + case JSON_VALUE_OBJECT: + delete_dynamic(&val->value.object.kv_pairs); + break; + + case JSON_VALUE_ARRAY: + delete_dynamic(&val->value.array.values); + break; + + case JSON_VALUE_STRING: + json_norm_string_free(&val->value.string); + break; + + case JSON_VALUE_NUMBER: + json_norm_number_free(&val->value.number); + break; + + default: break; + } + val->type= JSON_VALUE_UNINITIALIZED; + continue; + } + + frame.visited= CLOSE_NON_SCALAR; + push_dynamic(&stack, &frame); + + switch (val->type) + { + case JSON_VALUE_OBJECT: + { + DYNAMIC_ARRAY *pairs= &val->value.object.kv_pairs; + for (i= 0; i < pairs->elements; i++) + { + struct json_norm_kv *kv= + dynamic_element(pairs, i, struct json_norm_kv*); + json_norm_string_free(&kv->key); + frame.val= &kv->value; + frame.visited= UNPROCESSED; + push_dynamic(&stack, &frame); + } + break; + } + + case JSON_VALUE_ARRAY: + { + DYNAMIC_ARRAY *values= &val->value.array.values; + for (i= 0; i < values->elements; i++) + { + struct json_norm_value *child= + dynamic_element(values, i, struct json_norm_value*); + frame.val= child; + frame.visited= UNPROCESSED; + push_dynamic(&stack, &frame); + } + break; + } + + case JSON_VALUE_STRING: + json_norm_string_free(&val->value.string); + break; + + case JSON_VALUE_NUMBER: + json_norm_number_free(&val->value.number); + break; + + default: break; + } + } while (stack.elements); + + delete_dynamic(&stack); + + return; +} + + +/* + We use "visited" to keep track of where we are while appending the JSON, + since we are doing this without real recursion. + + 0: Seeing this first time. If it's an object or array, we will append the + opening bracket as needed and then push its children. + 1: We are done appending all children. Now we just need to append the + closing bracket. + 2: Only for objects. It means we still have to append the key and ':' + before appending the child value. + 3: Only for arrays. We might need to append a ',' before the next element. + + This lets us simulate recursive JSON appending using our own stack. + + return values: + 1: failure + 0: success +*/ +static int json_norm_to_string(DYNAMIC_STRING *buf, + struct json_norm_value *root) +{ + DYNAMIC_ARRAY stack; + struct json_norm_frame frame, child_frame; + struct json_norm_value *val; + size_t i; + + if (json_norm_init_dynamic_array(sizeof(struct json_norm_frame), &stack)) + return 1; + + frame.val= root; + frame.index= 0; + frame.visited= UNPROCESSED; + + push_dynamic(&stack, &frame); + + do + { + frame= *(struct json_norm_frame *)pop_dynamic(&stack); + val= frame.val; + + if (!val) + continue; + + if (frame.visited == CLOSE_NON_SCALAR) + { + if (val->type == JSON_VALUE_OBJECT) + { + if (dynstr_append_mem(buf, STRING_WITH_LEN("}"))) + goto error; + } + else if (val->type == JSON_VALUE_ARRAY) + { + if (dynstr_append_mem(buf, STRING_WITH_LEN("]"))) + goto error; + } + continue; + } + else if (frame.visited == WRITE_KEY) + { + struct json_norm_object *obj= &val->value.object; + struct json_norm_kv *kv= + dynamic_element(&obj->kv_pairs, frame.index, struct json_norm_kv*); + + if (frame.index > 0 && dynstr_append_mem(buf, STRING_WITH_LEN(","))) + goto error; if (dynstr_append_mem(buf, STRING_WITH_LEN("\"")) || dynstr_append(buf, kv->key.str) || - dynstr_append_mem(buf, STRING_WITH_LEN("\":")) || - json_norm_to_string(buf, &kv->value)) - return 1; + dynstr_append_mem(buf, STRING_WITH_LEN("\":"))) + goto error; + continue; - if (i != (pairs_arr->elements - 1)) - if (dynstr_append_mem(buf, STRING_WITH_LEN(","))) - return 1; } - if (dynstr_append_mem(buf, STRING_WITH_LEN("}"))) - return 1; - break; - } - case JSON_VALUE_ARRAY: - { - size_t i; - struct json_norm_array *arr= &val->value.array; - DYNAMIC_ARRAY *values_arr= &arr->values; - - if (dynstr_append_mem(buf, STRING_WITH_LEN("["))) - return 1; - for (i= 0; i < values_arr->elements; ++i) + else if (frame.visited == WRITE_ITEM_SEPARATOR) { - struct json_norm_value *jt_value; - jt_value= dynamic_element(values_arr, i, struct json_norm_value *); - - if (json_norm_to_string(buf, jt_value)) - return 1; - if (i != (values_arr->elements - 1)) - if (dynstr_append_mem(buf, STRING_WITH_LEN(","))) - return 1; + if (frame.index > 0 && dynstr_append_mem(buf, STRING_WITH_LEN(","))) + goto error; + continue; } - if (dynstr_append_mem(buf, STRING_WITH_LEN("]"))) - return 1; - break; - } - case JSON_VALUE_STRING: - { - if (dynstr_append(buf, val->value.string.str)) - return 1; - break; - } - case JSON_VALUE_NULL: - { - if (dynstr_append_mem(buf, STRING_WITH_LEN("null"))) - return 1; - break; - } - case JSON_VALUE_TRUE: - { - if (dynstr_append_mem(buf, STRING_WITH_LEN("true"))) - return 1; - break; - } - case JSON_VALUE_FALSE: - { - if (dynstr_append_mem(buf, STRING_WITH_LEN("false"))) - return 1; - break; - } - case JSON_VALUE_NUMBER: - { - if (dynstr_append(buf, val->value.number.str)) - return 1; - break; - } - case JSON_VALUE_UNINITIALIZED: - { - DBUG_ASSERT(0); - break; - } - } + + if (val->type != JSON_VALUE_OBJECT && val->type != JSON_VALUE_ARRAY) + { + switch (val->type) + { + case JSON_VALUE_STRING: + if (dynstr_append(buf, val->value.string.str)) + goto error; + break; + + case JSON_VALUE_NUMBER: + if (dynstr_append(buf, val->value.number.str)) + goto error; + break; + + case JSON_VALUE_NULL: + if (dynstr_append_mem(buf, STRING_WITH_LEN("null"))) + goto error; + break; + + case JSON_VALUE_TRUE: + if (dynstr_append_mem(buf, STRING_WITH_LEN("true"))) + goto error; + break; + + case JSON_VALUE_FALSE: + if (dynstr_append_mem(buf, STRING_WITH_LEN("false"))) + goto error; + break; + + default: DBUG_ASSERT(0); + break; + } + continue; + } + + frame.visited= CLOSE_NON_SCALAR; + push_dynamic(&stack, &frame); + + if (val->type == JSON_VALUE_OBJECT) + { + struct json_norm_object *obj= &val->value.object; + DYNAMIC_ARRAY *pairs= &obj->kv_pairs; + + if (dynstr_append_mem(buf, STRING_WITH_LEN("{"))) + goto error; + + for (i= pairs->elements; i > 0; i--) + { + size_t current_index= i-1; + struct json_norm_kv *kv= + dynamic_element(pairs, current_index, struct json_norm_kv*); + + child_frame.val= &kv->value; + child_frame.index= 0; + child_frame.visited= UNPROCESSED; + push_dynamic(&stack, &child_frame); + + child_frame.val= val; + child_frame.index= current_index; + child_frame.visited= WRITE_KEY; + push_dynamic(&stack, &child_frame); + } + } + else + { + struct json_norm_array *arr= &val->value.array; + DYNAMIC_ARRAY *values= &arr->values; + + if (dynstr_append_mem(buf, STRING_WITH_LEN("["))) + goto error; + + for (i= values->elements; i > 0; i--) + { + size_t current_index= i-1; + struct json_norm_value *child= + dynamic_element(values, current_index, struct json_norm_value*); + + child_frame.val= child; + child_frame.index= 0; + child_frame.visited= UNPROCESSED; + push_dynamic(&stack, &child_frame); + + child_frame.val= val; + child_frame.index= current_index; + child_frame.visited= WRITE_ITEM_SEPARATOR; + push_dynamic(&stack, &child_frame); + } + } + } while (stack.elements); + + delete_dynamic(&stack); return 0; +error: + delete_dynamic(&stack); + return 1; }