mirror of
https://github.com/MariaDB/server.git
synced 2025-01-28 09:44:17 +01:00
MDEV-22011: DISTINCT with JSON_ARRAYAGG gives wrong results
For DISTINCT to be handled with JSON_ARRAYAGG, we need to make sure that the Unique tree also holds the NULL bytes of a table record inside the node of the tree. This behaviour for JSON_ARRAYAGG is different from GROUP_CONCAT because in GROUP_CONCAT we just reject NULL values for columns. Also introduced a comparator function for the unique tree to handle null values for distinct inside JSON_ARRAYAGG.
This commit is contained in:
parent
a006e88cac
commit
0f6f0daa4d
5 changed files with 200 additions and 10 deletions
|
@ -1160,13 +1160,13 @@ JSON_ARRAYAGG(DISTINCT a)
|
|||
[1,2,3]
|
||||
SELECT JSON_ARRAYAGG(DISTINCT b) FROM t1;
|
||||
JSON_ARRAYAGG(DISTINCT b)
|
||||
["Hello","World","This","Will","Work","!",null]
|
||||
[null,"!","Hello","This","Will","Work","World"]
|
||||
SELECT JSON_ARRAYAGG(DISTINCT a LIMIT 2) FROM t1;
|
||||
JSON_ARRAYAGG(DISTINCT a LIMIT 2)
|
||||
[1,2]
|
||||
SELECT JSON_ARRAYAGG(DISTINCT b LIMIT 2) FROM t1;
|
||||
JSON_ARRAYAGG(DISTINCT b LIMIT 2)
|
||||
["Hello","World"]
|
||||
[null,"!"]
|
||||
#
|
||||
# JSON aggregation
|
||||
#
|
||||
|
@ -1247,5 +1247,42 @@ select json_object('x', json_arrayagg(json_object('a', 1)));
|
|||
json_object('x', json_arrayagg(json_object('a', 1)))
|
||||
{"x": [{"a": 1}]}
|
||||
#
|
||||
# MDEV-22011: DISTINCT with JSON_ARRAYAGG gives wrong results
|
||||
#
|
||||
CREATE TABLE t1(a INT, b INT);
|
||||
INSERT INTO t1 VALUES (1,1), (2,2), (3,3);
|
||||
INSERT INTO t1 VALUES (1,1), (2,2), (3,3);
|
||||
SELECT JSON_ARRAYAGG(a) FROM t1;
|
||||
JSON_ARRAYAGG(a)
|
||||
[1,2,3,1,2,3]
|
||||
SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
|
||||
JSON_ARRAYAGG(DISTINCT a)
|
||||
[1,2,3]
|
||||
INSERT INTO t1 VALUES (NULL,NULL), (NULL,NULL);
|
||||
SELECT JSON_ARRAYAGG(a) FROM t1;
|
||||
JSON_ARRAYAGG(a)
|
||||
[1,2,3,1,2,3,null,null]
|
||||
SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
|
||||
JSON_ARRAYAGG(DISTINCT a)
|
||||
[null,1,2,3]
|
||||
DROP TABLE t1;
|
||||
CREATE TABLE t1(a VARCHAR(10), b INT);
|
||||
INSERT INTO t1 VALUES (1,1), (2,2), (3,3);
|
||||
INSERT INTO t1 VALUES (1,1), (2,2), (3,3);
|
||||
SELECT JSON_ARRAYAGG(a) FROM t1;
|
||||
JSON_ARRAYAGG(a)
|
||||
["1","2","3","1","2","3"]
|
||||
SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
|
||||
JSON_ARRAYAGG(DISTINCT a)
|
||||
["1","2","3"]
|
||||
INSERT INTO t1 VALUES (NULL,NULL), (NULL,NULL);
|
||||
SELECT JSON_ARRAYAGG(a) FROM t1;
|
||||
JSON_ARRAYAGG(a)
|
||||
["1","2","3","1","2","3",null,null]
|
||||
SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
|
||||
JSON_ARRAYAGG(DISTINCT a)
|
||||
[null,"1","2","3"]
|
||||
DROP TABLE t1;
|
||||
#
|
||||
# End of 10.5 tests
|
||||
#
|
||||
|
|
|
@ -763,6 +763,37 @@ select json_arrayagg(a order by a asc) from (select 1 a union select 2 a) t;
|
|||
|
||||
select json_object('x', json_arrayagg(json_object('a', 1)));
|
||||
|
||||
--echo #
|
||||
--echo # MDEV-22011: DISTINCT with JSON_ARRAYAGG gives wrong results
|
||||
--echo #
|
||||
|
||||
CREATE TABLE t1(a INT, b INT);
|
||||
INSERT INTO t1 VALUES (1,1), (2,2), (3,3);
|
||||
INSERT INTO t1 VALUES (1,1), (2,2), (3,3);
|
||||
|
||||
SELECT JSON_ARRAYAGG(a) FROM t1;
|
||||
SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
|
||||
|
||||
INSERT INTO t1 VALUES (NULL,NULL), (NULL,NULL);
|
||||
|
||||
SELECT JSON_ARRAYAGG(a) FROM t1;
|
||||
SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
|
||||
DROP TABLE t1;
|
||||
|
||||
CREATE TABLE t1(a VARCHAR(10), b INT);
|
||||
INSERT INTO t1 VALUES (1,1), (2,2), (3,3);
|
||||
INSERT INTO t1 VALUES (1,1), (2,2), (3,3);
|
||||
|
||||
SELECT JSON_ARRAYAGG(a) FROM t1;
|
||||
SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
|
||||
|
||||
INSERT INTO t1 VALUES (NULL,NULL), (NULL,NULL);
|
||||
|
||||
SELECT JSON_ARRAYAGG(a) FROM t1;
|
||||
SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
|
||||
|
||||
DROP TABLE t1;
|
||||
|
||||
--echo #
|
||||
--echo # End of 10.5 tests
|
||||
--echo #
|
||||
|
|
|
@ -1461,7 +1461,7 @@ static int append_json_value_from_field(String *str,
|
|||
const char *t_f;
|
||||
int t_f_len;
|
||||
|
||||
if (f->is_null(offset))
|
||||
if (f->is_null_in_record(key))
|
||||
goto append_null;
|
||||
|
||||
if (v_int)
|
||||
|
@ -1479,7 +1479,7 @@ static int append_json_value_from_field(String *str,
|
|||
}
|
||||
{
|
||||
String *sv= f->val_str(tmp_val, key + offset);
|
||||
if (f->is_null(offset))
|
||||
if (f->is_null_in_record(key))
|
||||
goto append_null;
|
||||
if (i->is_json_type())
|
||||
return str->append(sv->ptr(), sv->length());
|
||||
|
|
125
sql/item_sum.cc
125
sql/item_sum.cc
|
@ -3520,7 +3520,7 @@ String *Item_sum_udf_str::val_str(String *str)
|
|||
*/
|
||||
|
||||
extern "C"
|
||||
int group_concat_key_cmp_with_distinct(void* arg, const void* key1,
|
||||
int group_concat_key_cmp_with_distinct(void* arg, const void* key1,
|
||||
const void* key2)
|
||||
{
|
||||
Item_func_group_concat *item_func= (Item_func_group_concat*)arg;
|
||||
|
@ -3554,6 +3554,63 @@ int group_concat_key_cmp_with_distinct(void* arg, const void* key1,
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
@brief
|
||||
Comparator function for DISTINCT clause taking into account NULL values.
|
||||
|
||||
@note
|
||||
Used for JSON_ARRAYAGG function
|
||||
*/
|
||||
|
||||
int group_concat_key_cmp_with_distinct_with_nulls(void* arg,
|
||||
const void* key1_arg,
|
||||
const void* key2_arg)
|
||||
{
|
||||
Item_func_group_concat *item_func= (Item_func_group_concat*)arg;
|
||||
|
||||
uchar *key1= (uchar*)key1_arg + item_func->table->s->null_bytes;
|
||||
uchar *key2= (uchar*)key2_arg + item_func->table->s->null_bytes;
|
||||
|
||||
/*
|
||||
JSON_ARRAYAGG function only accepts one argument.
|
||||
*/
|
||||
|
||||
Item *item= item_func->args[0];
|
||||
/*
|
||||
If item is a const item then either get_tmp_table_field returns 0
|
||||
or it is an item over a const table.
|
||||
*/
|
||||
if (item->const_item())
|
||||
return 0;
|
||||
/*
|
||||
We have to use get_tmp_table_field() instead of
|
||||
real_item()->get_tmp_table_field() because we want the field in
|
||||
the temporary table, not the original field
|
||||
*/
|
||||
Field *field= item->get_tmp_table_field();
|
||||
|
||||
if (!field)
|
||||
return 0;
|
||||
|
||||
if (field->is_null_in_record((uchar*)key1_arg) &&
|
||||
field->is_null_in_record((uchar*)key2_arg))
|
||||
return 0;
|
||||
|
||||
if (field->is_null_in_record((uchar*)key1_arg))
|
||||
return -1;
|
||||
|
||||
if (field->is_null_in_record((uchar*)key2_arg))
|
||||
return 1;
|
||||
|
||||
uint offset= (field->offset(field->table->record[0]) -
|
||||
field->table->s->null_bytes);
|
||||
int res= field->cmp(key1 + offset, key2 + offset);
|
||||
if (res)
|
||||
return res;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
function of sort for syntax: GROUP_CONCAT(expr,... ORDER BY col,... )
|
||||
*/
|
||||
|
@ -3672,7 +3729,8 @@ int dump_leaf_key(void* key_arg, element_count count __attribute__((unused)),
|
|||
uint offset= (field->offset(field->table->record[0]) -
|
||||
table->s->null_bytes);
|
||||
DBUG_ASSERT(offset < table->s->reclength);
|
||||
res= item->get_str_from_field(*arg, field, &tmp, key, offset);
|
||||
res= item->get_str_from_field(*arg, field, &tmp, key,
|
||||
offset + item->get_null_bytes());
|
||||
}
|
||||
else
|
||||
res= item->get_str_from_item(*arg, &tmp);
|
||||
|
@ -4012,6 +4070,14 @@ bool Item_func_group_concat::add(bool exclude_nulls)
|
|||
if (tree && (res= field->val_str(&buf)))
|
||||
row_str_len+= res->length();
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
should not reach here, we create temp table for all the arguments of
|
||||
the group_concat function
|
||||
*/
|
||||
DBUG_ASSERT(0);
|
||||
}
|
||||
}
|
||||
|
||||
null_value= FALSE;
|
||||
|
@ -4021,7 +4087,7 @@ bool Item_func_group_concat::add(bool exclude_nulls)
|
|||
{
|
||||
/* Filter out duplicate rows. */
|
||||
uint count= unique_filter->elements_in_tree();
|
||||
unique_filter->unique_add(table->record[0] + table->s->null_bytes);
|
||||
unique_filter->unique_add(get_record_pointer());
|
||||
if (count == unique_filter->elements_in_tree())
|
||||
row_eligible= FALSE;
|
||||
}
|
||||
|
@ -4047,7 +4113,7 @@ bool Item_func_group_concat::add(bool exclude_nulls)
|
|||
row to the output buffer here. That will be done in val_str.
|
||||
*/
|
||||
if (row_eligible && !warning_for_row && (!tree && !distinct))
|
||||
dump_leaf_key(table->record[0] + table->s->null_bytes, 1, this);
|
||||
dump_leaf_key(get_record_pointer(), 1, this);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -4247,9 +4313,9 @@ bool Item_func_group_concat::setup(THD *thd)
|
|||
}
|
||||
|
||||
if (distinct)
|
||||
unique_filter= new Unique(group_concat_key_cmp_with_distinct,
|
||||
unique_filter= new Unique(get_comparator_function_for_distinct(),
|
||||
(void*)this,
|
||||
tree_key_length,
|
||||
tree_key_length + get_null_bytes(),
|
||||
ram_limitation(thd));
|
||||
if ((row_limit && row_limit->cmp_type() != INT_RESULT) ||
|
||||
(offset_limit && offset_limit->cmp_type() != INT_RESULT))
|
||||
|
@ -4305,6 +4371,53 @@ String* Item_func_group_concat::val_str(String* str)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
@brief
|
||||
Get the comparator function for DISTINT clause
|
||||
*/
|
||||
|
||||
qsort_cmp2 Item_func_group_concat::get_comparator_function_for_distinct()
|
||||
{
|
||||
return skip_nulls() ?
|
||||
group_concat_key_cmp_with_distinct :
|
||||
group_concat_key_cmp_with_distinct_with_nulls;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
|
||||
@brief
|
||||
Get the record pointer of the current row of the table
|
||||
|
||||
@details
|
||||
look at the comments for Item_func_group_concat::get_null_bytes
|
||||
*/
|
||||
|
||||
uchar* Item_func_group_concat::get_record_pointer()
|
||||
{
|
||||
return skip_nulls() ?
|
||||
table->record[0] + table->s->null_bytes :
|
||||
table->record[0];
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
@brief
|
||||
Get the null bytes for the table if required.
|
||||
|
||||
@details
|
||||
This function is used for GROUP_CONCAT (or JSON_ARRAYAGG) implementation
|
||||
where the Unique tree or the ORDER BY tree may store the null values,
|
||||
in such case we also store the null bytes inside each node of the tree.
|
||||
|
||||
*/
|
||||
|
||||
uint Item_func_group_concat::get_null_bytes()
|
||||
{
|
||||
return skip_nulls() ? 0 : table->s->null_bytes;
|
||||
}
|
||||
|
||||
|
||||
void Item_func_group_concat::print(String *str, enum_query_type query_type)
|
||||
{
|
||||
str->append(func_name());
|
||||
|
|
|
@ -1849,6 +1849,8 @@ public:
|
|||
C_MODE_START
|
||||
int group_concat_key_cmp_with_distinct(void* arg, const void* key1,
|
||||
const void* key2);
|
||||
int group_concat_key_cmp_with_distinct_with_nulls(void* arg, const void* key1,
|
||||
const void* key2);
|
||||
int group_concat_key_cmp_with_order(void* arg, const void* key1,
|
||||
const void* key2);
|
||||
int dump_leaf_key(void* key_arg,
|
||||
|
@ -1913,6 +1915,9 @@ protected:
|
|||
|
||||
friend int group_concat_key_cmp_with_distinct(void* arg, const void* key1,
|
||||
const void* key2);
|
||||
friend int group_concat_key_cmp_with_distinct_with_nulls(void* arg,
|
||||
const void* key1,
|
||||
const void* key2);
|
||||
friend int group_concat_key_cmp_with_order(void* arg, const void* key1,
|
||||
const void* key2);
|
||||
friend int dump_leaf_key(void* key_arg,
|
||||
|
@ -2004,6 +2009,10 @@ public:
|
|||
{ context= (Name_resolution_context *)cntx; return FALSE; }
|
||||
Item *get_copy(THD *thd)
|
||||
{ return get_item_copy<Item_func_group_concat>(thd, this); }
|
||||
qsort_cmp2 get_comparator_function_for_distinct();
|
||||
uchar* get_record_pointer();
|
||||
uint get_null_bytes();
|
||||
|
||||
};
|
||||
|
||||
#endif /* ITEM_SUM_INCLUDED */
|
||||
|
|
Loading…
Add table
Reference in a new issue