diff --git a/mysql-test/main/func_json.result b/mysql-test/main/func_json.result index c87a2998724..4cadfc79913 100644 --- a/mysql-test/main/func_json.result +++ b/mysql-test/main/func_json.result @@ -1160,13 +1160,13 @@ JSON_ARRAYAGG(DISTINCT a) [1,2,3] SELECT JSON_ARRAYAGG(DISTINCT b) FROM t1; JSON_ARRAYAGG(DISTINCT b) -["Hello","World","This","Will","Work","!",null] +[null,"!","Hello","This","Will","Work","World"] SELECT JSON_ARRAYAGG(DISTINCT a LIMIT 2) FROM t1; JSON_ARRAYAGG(DISTINCT a LIMIT 2) [1,2] SELECT JSON_ARRAYAGG(DISTINCT b LIMIT 2) FROM t1; JSON_ARRAYAGG(DISTINCT b LIMIT 2) -["Hello","World"] +[null,"!"] # # JSON aggregation # @@ -1247,5 +1247,42 @@ select json_object('x', json_arrayagg(json_object('a', 1))); json_object('x', json_arrayagg(json_object('a', 1))) {"x": [{"a": 1}]} # +# MDEV-22011: DISTINCT with JSON_ARRAYAGG gives wrong results +# +CREATE TABLE t1(a INT, b INT); +INSERT INTO t1 VALUES (1,1), (2,2), (3,3); +INSERT INTO t1 VALUES (1,1), (2,2), (3,3); +SELECT JSON_ARRAYAGG(a) FROM t1; +JSON_ARRAYAGG(a) +[1,2,3,1,2,3] +SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1; +JSON_ARRAYAGG(DISTINCT a) +[1,2,3] +INSERT INTO t1 VALUES (NULL,NULL), (NULL,NULL); +SELECT JSON_ARRAYAGG(a) FROM t1; +JSON_ARRAYAGG(a) +[1,2,3,1,2,3,null,null] +SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1; +JSON_ARRAYAGG(DISTINCT a) +[null,1,2,3] +DROP TABLE t1; +CREATE TABLE t1(a VARCHAR(10), b INT); +INSERT INTO t1 VALUES (1,1), (2,2), (3,3); +INSERT INTO t1 VALUES (1,1), (2,2), (3,3); +SELECT JSON_ARRAYAGG(a) FROM t1; +JSON_ARRAYAGG(a) +["1","2","3","1","2","3"] +SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1; +JSON_ARRAYAGG(DISTINCT a) +["1","2","3"] +INSERT INTO t1 VALUES (NULL,NULL), (NULL,NULL); +SELECT JSON_ARRAYAGG(a) FROM t1; +JSON_ARRAYAGG(a) +["1","2","3","1","2","3",null,null] +SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1; +JSON_ARRAYAGG(DISTINCT a) +[null,"1","2","3"] +DROP TABLE t1; +# # End of 10.5 tests # diff --git a/mysql-test/main/func_json.test b/mysql-test/main/func_json.test index d22b41027b4..ed67df9aab8 100644 --- a/mysql-test/main/func_json.test +++ b/mysql-test/main/func_json.test @@ -763,6 +763,37 @@ select json_arrayagg(a order by a asc) from (select 1 a union select 2 a) t; select json_object('x', json_arrayagg(json_object('a', 1))); +--echo # +--echo # MDEV-22011: DISTINCT with JSON_ARRAYAGG gives wrong results +--echo # + +CREATE TABLE t1(a INT, b INT); +INSERT INTO t1 VALUES (1,1), (2,2), (3,3); +INSERT INTO t1 VALUES (1,1), (2,2), (3,3); + +SELECT JSON_ARRAYAGG(a) FROM t1; +SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1; + +INSERT INTO t1 VALUES (NULL,NULL), (NULL,NULL); + +SELECT JSON_ARRAYAGG(a) FROM t1; +SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1; +DROP TABLE t1; + +CREATE TABLE t1(a VARCHAR(10), b INT); +INSERT INTO t1 VALUES (1,1), (2,2), (3,3); +INSERT INTO t1 VALUES (1,1), (2,2), (3,3); + +SELECT JSON_ARRAYAGG(a) FROM t1; +SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1; + +INSERT INTO t1 VALUES (NULL,NULL), (NULL,NULL); + +SELECT JSON_ARRAYAGG(a) FROM t1; +SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1; + +DROP TABLE t1; + --echo # --echo # End of 10.5 tests --echo # diff --git a/sql/item_jsonfunc.cc b/sql/item_jsonfunc.cc index 442e9b5bdca..7e2dc82cf1f 100644 --- a/sql/item_jsonfunc.cc +++ b/sql/item_jsonfunc.cc @@ -1461,7 +1461,7 @@ static int append_json_value_from_field(String *str, const char *t_f; int t_f_len; - if (f->is_null(offset)) + if (f->is_null_in_record(key)) goto append_null; if (v_int) @@ -1479,7 +1479,7 @@ static int append_json_value_from_field(String *str, } { String *sv= f->val_str(tmp_val, key + offset); - if (f->is_null(offset)) + if (f->is_null_in_record(key)) goto append_null; if (i->is_json_type()) return str->append(sv->ptr(), sv->length()); diff --git a/sql/item_sum.cc b/sql/item_sum.cc index d9c971b9919..0247356ce6b 100644 --- a/sql/item_sum.cc +++ b/sql/item_sum.cc @@ -3520,7 +3520,7 @@ String *Item_sum_udf_str::val_str(String *str) */ extern "C" -int group_concat_key_cmp_with_distinct(void* arg, const void* key1, +int group_concat_key_cmp_with_distinct(void* arg, const void* key1, const void* key2) { Item_func_group_concat *item_func= (Item_func_group_concat*)arg; @@ -3554,6 +3554,63 @@ int group_concat_key_cmp_with_distinct(void* arg, const void* key1, } +/* + @brief + Comparator function for DISTINCT clause taking into account NULL values. + + @note + Used for JSON_ARRAYAGG function +*/ + +int group_concat_key_cmp_with_distinct_with_nulls(void* arg, + const void* key1_arg, + const void* key2_arg) +{ + Item_func_group_concat *item_func= (Item_func_group_concat*)arg; + + uchar *key1= (uchar*)key1_arg + item_func->table->s->null_bytes; + uchar *key2= (uchar*)key2_arg + item_func->table->s->null_bytes; + + /* + JSON_ARRAYAGG function only accepts one argument. + */ + + Item *item= item_func->args[0]; + /* + If item is a const item then either get_tmp_table_field returns 0 + or it is an item over a const table. + */ + if (item->const_item()) + return 0; + /* + We have to use get_tmp_table_field() instead of + real_item()->get_tmp_table_field() because we want the field in + the temporary table, not the original field + */ + Field *field= item->get_tmp_table_field(); + + if (!field) + return 0; + + if (field->is_null_in_record((uchar*)key1_arg) && + field->is_null_in_record((uchar*)key2_arg)) + return 0; + + if (field->is_null_in_record((uchar*)key1_arg)) + return -1; + + if (field->is_null_in_record((uchar*)key2_arg)) + return 1; + + uint offset= (field->offset(field->table->record[0]) - + field->table->s->null_bytes); + int res= field->cmp(key1 + offset, key2 + offset); + if (res) + return res; + return 0; +} + + /** function of sort for syntax: GROUP_CONCAT(expr,... ORDER BY col,... ) */ @@ -3672,7 +3729,8 @@ int dump_leaf_key(void* key_arg, element_count count __attribute__((unused)), uint offset= (field->offset(field->table->record[0]) - table->s->null_bytes); DBUG_ASSERT(offset < table->s->reclength); - res= item->get_str_from_field(*arg, field, &tmp, key, offset); + res= item->get_str_from_field(*arg, field, &tmp, key, + offset + item->get_null_bytes()); } else res= item->get_str_from_item(*arg, &tmp); @@ -4012,6 +4070,14 @@ bool Item_func_group_concat::add(bool exclude_nulls) if (tree && (res= field->val_str(&buf))) row_str_len+= res->length(); } + else + { + /* + should not reach here, we create temp table for all the arguments of + the group_concat function + */ + DBUG_ASSERT(0); + } } null_value= FALSE; @@ -4021,7 +4087,7 @@ bool Item_func_group_concat::add(bool exclude_nulls) { /* Filter out duplicate rows. */ uint count= unique_filter->elements_in_tree(); - unique_filter->unique_add(table->record[0] + table->s->null_bytes); + unique_filter->unique_add(get_record_pointer()); if (count == unique_filter->elements_in_tree()) row_eligible= FALSE; } @@ -4047,7 +4113,7 @@ bool Item_func_group_concat::add(bool exclude_nulls) row to the output buffer here. That will be done in val_str. */ if (row_eligible && !warning_for_row && (!tree && !distinct)) - dump_leaf_key(table->record[0] + table->s->null_bytes, 1, this); + dump_leaf_key(get_record_pointer(), 1, this); return 0; } @@ -4247,9 +4313,9 @@ bool Item_func_group_concat::setup(THD *thd) } if (distinct) - unique_filter= new Unique(group_concat_key_cmp_with_distinct, + unique_filter= new Unique(get_comparator_function_for_distinct(), (void*)this, - tree_key_length, + tree_key_length + get_null_bytes(), ram_limitation(thd)); if ((row_limit && row_limit->cmp_type() != INT_RESULT) || (offset_limit && offset_limit->cmp_type() != INT_RESULT)) @@ -4305,6 +4371,53 @@ String* Item_func_group_concat::val_str(String* str) } +/* + @brief + Get the comparator function for DISTINT clause +*/ + +qsort_cmp2 Item_func_group_concat::get_comparator_function_for_distinct() +{ + return skip_nulls() ? + group_concat_key_cmp_with_distinct : + group_concat_key_cmp_with_distinct_with_nulls; +} + + +/* + + @brief + Get the record pointer of the current row of the table + + @details + look at the comments for Item_func_group_concat::get_null_bytes +*/ + +uchar* Item_func_group_concat::get_record_pointer() +{ + return skip_nulls() ? + table->record[0] + table->s->null_bytes : + table->record[0]; +} + + +/* + @brief + Get the null bytes for the table if required. + + @details + This function is used for GROUP_CONCAT (or JSON_ARRAYAGG) implementation + where the Unique tree or the ORDER BY tree may store the null values, + in such case we also store the null bytes inside each node of the tree. + +*/ + +uint Item_func_group_concat::get_null_bytes() +{ + return skip_nulls() ? 0 : table->s->null_bytes; +} + + void Item_func_group_concat::print(String *str, enum_query_type query_type) { str->append(func_name()); diff --git a/sql/item_sum.h b/sql/item_sum.h index b1dff9acd4a..cbf74c48826 100644 --- a/sql/item_sum.h +++ b/sql/item_sum.h @@ -1849,6 +1849,8 @@ public: C_MODE_START int group_concat_key_cmp_with_distinct(void* arg, const void* key1, const void* key2); +int group_concat_key_cmp_with_distinct_with_nulls(void* arg, const void* key1, + const void* key2); int group_concat_key_cmp_with_order(void* arg, const void* key1, const void* key2); int dump_leaf_key(void* key_arg, @@ -1913,6 +1915,9 @@ protected: friend int group_concat_key_cmp_with_distinct(void* arg, const void* key1, const void* key2); + friend int group_concat_key_cmp_with_distinct_with_nulls(void* arg, + const void* key1, + const void* key2); friend int group_concat_key_cmp_with_order(void* arg, const void* key1, const void* key2); friend int dump_leaf_key(void* key_arg, @@ -2004,6 +2009,10 @@ public: { context= (Name_resolution_context *)cntx; return FALSE; } Item *get_copy(THD *thd) { return get_item_copy(thd, this); } + qsort_cmp2 get_comparator_function_for_distinct(); + uchar* get_record_pointer(); + uint get_null_bytes(); + }; #endif /* ITEM_SUM_INCLUDED */