mirror of
https://github.com/MariaDB/server.git
synced 2025-01-29 18:20:07 +01:00
MDEV-22011: DISTINCT with JSON_ARRAYAGG gives wrong results
For DISTINCT to be handled with JSON_ARRAYAGG, we need to make sure that the Unique tree also holds the NULL bytes of a table record inside the node of the tree. This behaviour for JSON_ARRAYAGG is different from GROUP_CONCAT because in GROUP_CONCAT we just reject NULL values for columns. Also introduced a comparator function for the unique tree to handle null values for distinct inside JSON_ARRAYAGG.
This commit is contained in:
parent
a006e88cac
commit
0f6f0daa4d
5 changed files with 200 additions and 10 deletions
|
@ -1160,13 +1160,13 @@ JSON_ARRAYAGG(DISTINCT a)
|
||||||
[1,2,3]
|
[1,2,3]
|
||||||
SELECT JSON_ARRAYAGG(DISTINCT b) FROM t1;
|
SELECT JSON_ARRAYAGG(DISTINCT b) FROM t1;
|
||||||
JSON_ARRAYAGG(DISTINCT b)
|
JSON_ARRAYAGG(DISTINCT b)
|
||||||
["Hello","World","This","Will","Work","!",null]
|
[null,"!","Hello","This","Will","Work","World"]
|
||||||
SELECT JSON_ARRAYAGG(DISTINCT a LIMIT 2) FROM t1;
|
SELECT JSON_ARRAYAGG(DISTINCT a LIMIT 2) FROM t1;
|
||||||
JSON_ARRAYAGG(DISTINCT a LIMIT 2)
|
JSON_ARRAYAGG(DISTINCT a LIMIT 2)
|
||||||
[1,2]
|
[1,2]
|
||||||
SELECT JSON_ARRAYAGG(DISTINCT b LIMIT 2) FROM t1;
|
SELECT JSON_ARRAYAGG(DISTINCT b LIMIT 2) FROM t1;
|
||||||
JSON_ARRAYAGG(DISTINCT b LIMIT 2)
|
JSON_ARRAYAGG(DISTINCT b LIMIT 2)
|
||||||
["Hello","World"]
|
[null,"!"]
|
||||||
#
|
#
|
||||||
# JSON aggregation
|
# JSON aggregation
|
||||||
#
|
#
|
||||||
|
@ -1247,5 +1247,42 @@ select json_object('x', json_arrayagg(json_object('a', 1)));
|
||||||
json_object('x', json_arrayagg(json_object('a', 1)))
|
json_object('x', json_arrayagg(json_object('a', 1)))
|
||||||
{"x": [{"a": 1}]}
|
{"x": [{"a": 1}]}
|
||||||
#
|
#
|
||||||
|
# MDEV-22011: DISTINCT with JSON_ARRAYAGG gives wrong results
|
||||||
|
#
|
||||||
|
CREATE TABLE t1(a INT, b INT);
|
||||||
|
INSERT INTO t1 VALUES (1,1), (2,2), (3,3);
|
||||||
|
INSERT INTO t1 VALUES (1,1), (2,2), (3,3);
|
||||||
|
SELECT JSON_ARRAYAGG(a) FROM t1;
|
||||||
|
JSON_ARRAYAGG(a)
|
||||||
|
[1,2,3,1,2,3]
|
||||||
|
SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
|
||||||
|
JSON_ARRAYAGG(DISTINCT a)
|
||||||
|
[1,2,3]
|
||||||
|
INSERT INTO t1 VALUES (NULL,NULL), (NULL,NULL);
|
||||||
|
SELECT JSON_ARRAYAGG(a) FROM t1;
|
||||||
|
JSON_ARRAYAGG(a)
|
||||||
|
[1,2,3,1,2,3,null,null]
|
||||||
|
SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
|
||||||
|
JSON_ARRAYAGG(DISTINCT a)
|
||||||
|
[null,1,2,3]
|
||||||
|
DROP TABLE t1;
|
||||||
|
CREATE TABLE t1(a VARCHAR(10), b INT);
|
||||||
|
INSERT INTO t1 VALUES (1,1), (2,2), (3,3);
|
||||||
|
INSERT INTO t1 VALUES (1,1), (2,2), (3,3);
|
||||||
|
SELECT JSON_ARRAYAGG(a) FROM t1;
|
||||||
|
JSON_ARRAYAGG(a)
|
||||||
|
["1","2","3","1","2","3"]
|
||||||
|
SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
|
||||||
|
JSON_ARRAYAGG(DISTINCT a)
|
||||||
|
["1","2","3"]
|
||||||
|
INSERT INTO t1 VALUES (NULL,NULL), (NULL,NULL);
|
||||||
|
SELECT JSON_ARRAYAGG(a) FROM t1;
|
||||||
|
JSON_ARRAYAGG(a)
|
||||||
|
["1","2","3","1","2","3",null,null]
|
||||||
|
SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
|
||||||
|
JSON_ARRAYAGG(DISTINCT a)
|
||||||
|
[null,"1","2","3"]
|
||||||
|
DROP TABLE t1;
|
||||||
|
#
|
||||||
# End of 10.5 tests
|
# End of 10.5 tests
|
||||||
#
|
#
|
||||||
|
|
|
@ -763,6 +763,37 @@ select json_arrayagg(a order by a asc) from (select 1 a union select 2 a) t;
|
||||||
|
|
||||||
select json_object('x', json_arrayagg(json_object('a', 1)));
|
select json_object('x', json_arrayagg(json_object('a', 1)));
|
||||||
|
|
||||||
|
--echo #
|
||||||
|
--echo # MDEV-22011: DISTINCT with JSON_ARRAYAGG gives wrong results
|
||||||
|
--echo #
|
||||||
|
|
||||||
|
CREATE TABLE t1(a INT, b INT);
|
||||||
|
INSERT INTO t1 VALUES (1,1), (2,2), (3,3);
|
||||||
|
INSERT INTO t1 VALUES (1,1), (2,2), (3,3);
|
||||||
|
|
||||||
|
SELECT JSON_ARRAYAGG(a) FROM t1;
|
||||||
|
SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
|
||||||
|
|
||||||
|
INSERT INTO t1 VALUES (NULL,NULL), (NULL,NULL);
|
||||||
|
|
||||||
|
SELECT JSON_ARRAYAGG(a) FROM t1;
|
||||||
|
SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
|
||||||
|
DROP TABLE t1;
|
||||||
|
|
||||||
|
CREATE TABLE t1(a VARCHAR(10), b INT);
|
||||||
|
INSERT INTO t1 VALUES (1,1), (2,2), (3,3);
|
||||||
|
INSERT INTO t1 VALUES (1,1), (2,2), (3,3);
|
||||||
|
|
||||||
|
SELECT JSON_ARRAYAGG(a) FROM t1;
|
||||||
|
SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
|
||||||
|
|
||||||
|
INSERT INTO t1 VALUES (NULL,NULL), (NULL,NULL);
|
||||||
|
|
||||||
|
SELECT JSON_ARRAYAGG(a) FROM t1;
|
||||||
|
SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
|
||||||
|
|
||||||
|
DROP TABLE t1;
|
||||||
|
|
||||||
--echo #
|
--echo #
|
||||||
--echo # End of 10.5 tests
|
--echo # End of 10.5 tests
|
||||||
--echo #
|
--echo #
|
||||||
|
|
|
@ -1461,7 +1461,7 @@ static int append_json_value_from_field(String *str,
|
||||||
const char *t_f;
|
const char *t_f;
|
||||||
int t_f_len;
|
int t_f_len;
|
||||||
|
|
||||||
if (f->is_null(offset))
|
if (f->is_null_in_record(key))
|
||||||
goto append_null;
|
goto append_null;
|
||||||
|
|
||||||
if (v_int)
|
if (v_int)
|
||||||
|
@ -1479,7 +1479,7 @@ static int append_json_value_from_field(String *str,
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
String *sv= f->val_str(tmp_val, key + offset);
|
String *sv= f->val_str(tmp_val, key + offset);
|
||||||
if (f->is_null(offset))
|
if (f->is_null_in_record(key))
|
||||||
goto append_null;
|
goto append_null;
|
||||||
if (i->is_json_type())
|
if (i->is_json_type())
|
||||||
return str->append(sv->ptr(), sv->length());
|
return str->append(sv->ptr(), sv->length());
|
||||||
|
|
125
sql/item_sum.cc
125
sql/item_sum.cc
|
@ -3520,7 +3520,7 @@ String *Item_sum_udf_str::val_str(String *str)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
extern "C"
|
extern "C"
|
||||||
int group_concat_key_cmp_with_distinct(void* arg, const void* key1,
|
int group_concat_key_cmp_with_distinct(void* arg, const void* key1,
|
||||||
const void* key2)
|
const void* key2)
|
||||||
{
|
{
|
||||||
Item_func_group_concat *item_func= (Item_func_group_concat*)arg;
|
Item_func_group_concat *item_func= (Item_func_group_concat*)arg;
|
||||||
|
@ -3554,6 +3554,63 @@ int group_concat_key_cmp_with_distinct(void* arg, const void* key1,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
@brief
|
||||||
|
Comparator function for DISTINCT clause taking into account NULL values.
|
||||||
|
|
||||||
|
@note
|
||||||
|
Used for JSON_ARRAYAGG function
|
||||||
|
*/
|
||||||
|
|
||||||
|
int group_concat_key_cmp_with_distinct_with_nulls(void* arg,
|
||||||
|
const void* key1_arg,
|
||||||
|
const void* key2_arg)
|
||||||
|
{
|
||||||
|
Item_func_group_concat *item_func= (Item_func_group_concat*)arg;
|
||||||
|
|
||||||
|
uchar *key1= (uchar*)key1_arg + item_func->table->s->null_bytes;
|
||||||
|
uchar *key2= (uchar*)key2_arg + item_func->table->s->null_bytes;
|
||||||
|
|
||||||
|
/*
|
||||||
|
JSON_ARRAYAGG function only accepts one argument.
|
||||||
|
*/
|
||||||
|
|
||||||
|
Item *item= item_func->args[0];
|
||||||
|
/*
|
||||||
|
If item is a const item then either get_tmp_table_field returns 0
|
||||||
|
or it is an item over a const table.
|
||||||
|
*/
|
||||||
|
if (item->const_item())
|
||||||
|
return 0;
|
||||||
|
/*
|
||||||
|
We have to use get_tmp_table_field() instead of
|
||||||
|
real_item()->get_tmp_table_field() because we want the field in
|
||||||
|
the temporary table, not the original field
|
||||||
|
*/
|
||||||
|
Field *field= item->get_tmp_table_field();
|
||||||
|
|
||||||
|
if (!field)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (field->is_null_in_record((uchar*)key1_arg) &&
|
||||||
|
field->is_null_in_record((uchar*)key2_arg))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (field->is_null_in_record((uchar*)key1_arg))
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
if (field->is_null_in_record((uchar*)key2_arg))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
uint offset= (field->offset(field->table->record[0]) -
|
||||||
|
field->table->s->null_bytes);
|
||||||
|
int res= field->cmp(key1 + offset, key2 + offset);
|
||||||
|
if (res)
|
||||||
|
return res;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
function of sort for syntax: GROUP_CONCAT(expr,... ORDER BY col,... )
|
function of sort for syntax: GROUP_CONCAT(expr,... ORDER BY col,... )
|
||||||
*/
|
*/
|
||||||
|
@ -3672,7 +3729,8 @@ int dump_leaf_key(void* key_arg, element_count count __attribute__((unused)),
|
||||||
uint offset= (field->offset(field->table->record[0]) -
|
uint offset= (field->offset(field->table->record[0]) -
|
||||||
table->s->null_bytes);
|
table->s->null_bytes);
|
||||||
DBUG_ASSERT(offset < table->s->reclength);
|
DBUG_ASSERT(offset < table->s->reclength);
|
||||||
res= item->get_str_from_field(*arg, field, &tmp, key, offset);
|
res= item->get_str_from_field(*arg, field, &tmp, key,
|
||||||
|
offset + item->get_null_bytes());
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
res= item->get_str_from_item(*arg, &tmp);
|
res= item->get_str_from_item(*arg, &tmp);
|
||||||
|
@ -4012,6 +4070,14 @@ bool Item_func_group_concat::add(bool exclude_nulls)
|
||||||
if (tree && (res= field->val_str(&buf)))
|
if (tree && (res= field->val_str(&buf)))
|
||||||
row_str_len+= res->length();
|
row_str_len+= res->length();
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
should not reach here, we create temp table for all the arguments of
|
||||||
|
the group_concat function
|
||||||
|
*/
|
||||||
|
DBUG_ASSERT(0);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
null_value= FALSE;
|
null_value= FALSE;
|
||||||
|
@ -4021,7 +4087,7 @@ bool Item_func_group_concat::add(bool exclude_nulls)
|
||||||
{
|
{
|
||||||
/* Filter out duplicate rows. */
|
/* Filter out duplicate rows. */
|
||||||
uint count= unique_filter->elements_in_tree();
|
uint count= unique_filter->elements_in_tree();
|
||||||
unique_filter->unique_add(table->record[0] + table->s->null_bytes);
|
unique_filter->unique_add(get_record_pointer());
|
||||||
if (count == unique_filter->elements_in_tree())
|
if (count == unique_filter->elements_in_tree())
|
||||||
row_eligible= FALSE;
|
row_eligible= FALSE;
|
||||||
}
|
}
|
||||||
|
@ -4047,7 +4113,7 @@ bool Item_func_group_concat::add(bool exclude_nulls)
|
||||||
row to the output buffer here. That will be done in val_str.
|
row to the output buffer here. That will be done in val_str.
|
||||||
*/
|
*/
|
||||||
if (row_eligible && !warning_for_row && (!tree && !distinct))
|
if (row_eligible && !warning_for_row && (!tree && !distinct))
|
||||||
dump_leaf_key(table->record[0] + table->s->null_bytes, 1, this);
|
dump_leaf_key(get_record_pointer(), 1, this);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -4247,9 +4313,9 @@ bool Item_func_group_concat::setup(THD *thd)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (distinct)
|
if (distinct)
|
||||||
unique_filter= new Unique(group_concat_key_cmp_with_distinct,
|
unique_filter= new Unique(get_comparator_function_for_distinct(),
|
||||||
(void*)this,
|
(void*)this,
|
||||||
tree_key_length,
|
tree_key_length + get_null_bytes(),
|
||||||
ram_limitation(thd));
|
ram_limitation(thd));
|
||||||
if ((row_limit && row_limit->cmp_type() != INT_RESULT) ||
|
if ((row_limit && row_limit->cmp_type() != INT_RESULT) ||
|
||||||
(offset_limit && offset_limit->cmp_type() != INT_RESULT))
|
(offset_limit && offset_limit->cmp_type() != INT_RESULT))
|
||||||
|
@ -4305,6 +4371,53 @@ String* Item_func_group_concat::val_str(String* str)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
@brief
|
||||||
|
Get the comparator function for DISTINT clause
|
||||||
|
*/
|
||||||
|
|
||||||
|
qsort_cmp2 Item_func_group_concat::get_comparator_function_for_distinct()
|
||||||
|
{
|
||||||
|
return skip_nulls() ?
|
||||||
|
group_concat_key_cmp_with_distinct :
|
||||||
|
group_concat_key_cmp_with_distinct_with_nulls;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
|
||||||
|
@brief
|
||||||
|
Get the record pointer of the current row of the table
|
||||||
|
|
||||||
|
@details
|
||||||
|
look at the comments for Item_func_group_concat::get_null_bytes
|
||||||
|
*/
|
||||||
|
|
||||||
|
uchar* Item_func_group_concat::get_record_pointer()
|
||||||
|
{
|
||||||
|
return skip_nulls() ?
|
||||||
|
table->record[0] + table->s->null_bytes :
|
||||||
|
table->record[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
@brief
|
||||||
|
Get the null bytes for the table if required.
|
||||||
|
|
||||||
|
@details
|
||||||
|
This function is used for GROUP_CONCAT (or JSON_ARRAYAGG) implementation
|
||||||
|
where the Unique tree or the ORDER BY tree may store the null values,
|
||||||
|
in such case we also store the null bytes inside each node of the tree.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
uint Item_func_group_concat::get_null_bytes()
|
||||||
|
{
|
||||||
|
return skip_nulls() ? 0 : table->s->null_bytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void Item_func_group_concat::print(String *str, enum_query_type query_type)
|
void Item_func_group_concat::print(String *str, enum_query_type query_type)
|
||||||
{
|
{
|
||||||
str->append(func_name());
|
str->append(func_name());
|
||||||
|
|
|
@ -1849,6 +1849,8 @@ public:
|
||||||
C_MODE_START
|
C_MODE_START
|
||||||
int group_concat_key_cmp_with_distinct(void* arg, const void* key1,
|
int group_concat_key_cmp_with_distinct(void* arg, const void* key1,
|
||||||
const void* key2);
|
const void* key2);
|
||||||
|
int group_concat_key_cmp_with_distinct_with_nulls(void* arg, const void* key1,
|
||||||
|
const void* key2);
|
||||||
int group_concat_key_cmp_with_order(void* arg, const void* key1,
|
int group_concat_key_cmp_with_order(void* arg, const void* key1,
|
||||||
const void* key2);
|
const void* key2);
|
||||||
int dump_leaf_key(void* key_arg,
|
int dump_leaf_key(void* key_arg,
|
||||||
|
@ -1913,6 +1915,9 @@ protected:
|
||||||
|
|
||||||
friend int group_concat_key_cmp_with_distinct(void* arg, const void* key1,
|
friend int group_concat_key_cmp_with_distinct(void* arg, const void* key1,
|
||||||
const void* key2);
|
const void* key2);
|
||||||
|
friend int group_concat_key_cmp_with_distinct_with_nulls(void* arg,
|
||||||
|
const void* key1,
|
||||||
|
const void* key2);
|
||||||
friend int group_concat_key_cmp_with_order(void* arg, const void* key1,
|
friend int group_concat_key_cmp_with_order(void* arg, const void* key1,
|
||||||
const void* key2);
|
const void* key2);
|
||||||
friend int dump_leaf_key(void* key_arg,
|
friend int dump_leaf_key(void* key_arg,
|
||||||
|
@ -2004,6 +2009,10 @@ public:
|
||||||
{ context= (Name_resolution_context *)cntx; return FALSE; }
|
{ context= (Name_resolution_context *)cntx; return FALSE; }
|
||||||
Item *get_copy(THD *thd)
|
Item *get_copy(THD *thd)
|
||||||
{ return get_item_copy<Item_func_group_concat>(thd, this); }
|
{ return get_item_copy<Item_func_group_concat>(thd, this); }
|
||||||
|
qsort_cmp2 get_comparator_function_for_distinct();
|
||||||
|
uchar* get_record_pointer();
|
||||||
|
uint get_null_bytes();
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* ITEM_SUM_INCLUDED */
|
#endif /* ITEM_SUM_INCLUDED */
|
||||||
|
|
Loading…
Add table
Reference in a new issue