MDEV-22011: DISTINCT with JSON_ARRAYAGG gives wrong results

For DISTINCT to be handled with JSON_ARRAYAGG, we need to make sure
that the Unique tree also holds the NULL bytes of a table record
inside the node of the tree. This behaviour for JSON_ARRAYAGG is
different from GROUP_CONCAT because in GROUP_CONCAT we just reject
NULL values for columns.

Also introduced a comparator function for the unique tree to handle null
values for distinct inside JSON_ARRAYAGG.
This commit is contained in:
Varun Gupta 2020-06-09 13:15:14 +05:30
parent a006e88cac
commit 0f6f0daa4d
5 changed files with 200 additions and 10 deletions

View file

@ -1160,13 +1160,13 @@ JSON_ARRAYAGG(DISTINCT a)
[1,2,3]
SELECT JSON_ARRAYAGG(DISTINCT b) FROM t1;
JSON_ARRAYAGG(DISTINCT b)
["Hello","World","This","Will","Work","!",null]
[null,"!","Hello","This","Will","Work","World"]
SELECT JSON_ARRAYAGG(DISTINCT a LIMIT 2) FROM t1;
JSON_ARRAYAGG(DISTINCT a LIMIT 2)
[1,2]
SELECT JSON_ARRAYAGG(DISTINCT b LIMIT 2) FROM t1;
JSON_ARRAYAGG(DISTINCT b LIMIT 2)
["Hello","World"]
[null,"!"]
#
# JSON aggregation
#
@ -1247,5 +1247,42 @@ select json_object('x', json_arrayagg(json_object('a', 1)));
json_object('x', json_arrayagg(json_object('a', 1)))
{"x": [{"a": 1}]}
#
# MDEV-22011: DISTINCT with JSON_ARRAYAGG gives wrong results
#
CREATE TABLE t1(a INT, b INT);
INSERT INTO t1 VALUES (1,1), (2,2), (3,3);
INSERT INTO t1 VALUES (1,1), (2,2), (3,3);
SELECT JSON_ARRAYAGG(a) FROM t1;
JSON_ARRAYAGG(a)
[1,2,3,1,2,3]
SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
JSON_ARRAYAGG(DISTINCT a)
[1,2,3]
INSERT INTO t1 VALUES (NULL,NULL), (NULL,NULL);
SELECT JSON_ARRAYAGG(a) FROM t1;
JSON_ARRAYAGG(a)
[1,2,3,1,2,3,null,null]
SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
JSON_ARRAYAGG(DISTINCT a)
[null,1,2,3]
DROP TABLE t1;
CREATE TABLE t1(a VARCHAR(10), b INT);
INSERT INTO t1 VALUES (1,1), (2,2), (3,3);
INSERT INTO t1 VALUES (1,1), (2,2), (3,3);
SELECT JSON_ARRAYAGG(a) FROM t1;
JSON_ARRAYAGG(a)
["1","2","3","1","2","3"]
SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
JSON_ARRAYAGG(DISTINCT a)
["1","2","3"]
INSERT INTO t1 VALUES (NULL,NULL), (NULL,NULL);
SELECT JSON_ARRAYAGG(a) FROM t1;
JSON_ARRAYAGG(a)
["1","2","3","1","2","3",null,null]
SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
JSON_ARRAYAGG(DISTINCT a)
[null,"1","2","3"]
DROP TABLE t1;
#
# End of 10.5 tests
#

View file

@ -763,6 +763,37 @@ select json_arrayagg(a order by a asc) from (select 1 a union select 2 a) t;
select json_object('x', json_arrayagg(json_object('a', 1)));
--echo #
--echo # MDEV-22011: DISTINCT with JSON_ARRAYAGG gives wrong results
--echo #
CREATE TABLE t1(a INT, b INT);
INSERT INTO t1 VALUES (1,1), (2,2), (3,3);
INSERT INTO t1 VALUES (1,1), (2,2), (3,3);
SELECT JSON_ARRAYAGG(a) FROM t1;
SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
INSERT INTO t1 VALUES (NULL,NULL), (NULL,NULL);
SELECT JSON_ARRAYAGG(a) FROM t1;
SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
DROP TABLE t1;
CREATE TABLE t1(a VARCHAR(10), b INT);
INSERT INTO t1 VALUES (1,1), (2,2), (3,3);
INSERT INTO t1 VALUES (1,1), (2,2), (3,3);
SELECT JSON_ARRAYAGG(a) FROM t1;
SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
INSERT INTO t1 VALUES (NULL,NULL), (NULL,NULL);
SELECT JSON_ARRAYAGG(a) FROM t1;
SELECT JSON_ARRAYAGG(DISTINCT a) FROM t1;
DROP TABLE t1;
--echo #
--echo # End of 10.5 tests
--echo #

View file

@ -1461,7 +1461,7 @@ static int append_json_value_from_field(String *str,
const char *t_f;
int t_f_len;
if (f->is_null(offset))
if (f->is_null_in_record(key))
goto append_null;
if (v_int)
@ -1479,7 +1479,7 @@ static int append_json_value_from_field(String *str,
}
{
String *sv= f->val_str(tmp_val, key + offset);
if (f->is_null(offset))
if (f->is_null_in_record(key))
goto append_null;
if (i->is_json_type())
return str->append(sv->ptr(), sv->length());

View file

@ -3520,7 +3520,7 @@ String *Item_sum_udf_str::val_str(String *str)
*/
extern "C"
int group_concat_key_cmp_with_distinct(void* arg, const void* key1,
int group_concat_key_cmp_with_distinct(void* arg, const void* key1,
const void* key2)
{
Item_func_group_concat *item_func= (Item_func_group_concat*)arg;
@ -3554,6 +3554,63 @@ int group_concat_key_cmp_with_distinct(void* arg, const void* key1,
}
/*
@brief
Comparator function for DISTINCT clause taking into account NULL values.
@note
Used for JSON_ARRAYAGG function
*/
int group_concat_key_cmp_with_distinct_with_nulls(void* arg,
const void* key1_arg,
const void* key2_arg)
{
Item_func_group_concat *item_func= (Item_func_group_concat*)arg;
uchar *key1= (uchar*)key1_arg + item_func->table->s->null_bytes;
uchar *key2= (uchar*)key2_arg + item_func->table->s->null_bytes;
/*
JSON_ARRAYAGG function only accepts one argument.
*/
Item *item= item_func->args[0];
/*
If item is a const item then either get_tmp_table_field returns 0
or it is an item over a const table.
*/
if (item->const_item())
return 0;
/*
We have to use get_tmp_table_field() instead of
real_item()->get_tmp_table_field() because we want the field in
the temporary table, not the original field
*/
Field *field= item->get_tmp_table_field();
if (!field)
return 0;
if (field->is_null_in_record((uchar*)key1_arg) &&
field->is_null_in_record((uchar*)key2_arg))
return 0;
if (field->is_null_in_record((uchar*)key1_arg))
return -1;
if (field->is_null_in_record((uchar*)key2_arg))
return 1;
uint offset= (field->offset(field->table->record[0]) -
field->table->s->null_bytes);
int res= field->cmp(key1 + offset, key2 + offset);
if (res)
return res;
return 0;
}
/**
function of sort for syntax: GROUP_CONCAT(expr,... ORDER BY col,... )
*/
@ -3672,7 +3729,8 @@ int dump_leaf_key(void* key_arg, element_count count __attribute__((unused)),
uint offset= (field->offset(field->table->record[0]) -
table->s->null_bytes);
DBUG_ASSERT(offset < table->s->reclength);
res= item->get_str_from_field(*arg, field, &tmp, key, offset);
res= item->get_str_from_field(*arg, field, &tmp, key,
offset + item->get_null_bytes());
}
else
res= item->get_str_from_item(*arg, &tmp);
@ -4012,6 +4070,14 @@ bool Item_func_group_concat::add(bool exclude_nulls)
if (tree && (res= field->val_str(&buf)))
row_str_len+= res->length();
}
else
{
/*
should not reach here, we create temp table for all the arguments of
the group_concat function
*/
DBUG_ASSERT(0);
}
}
null_value= FALSE;
@ -4021,7 +4087,7 @@ bool Item_func_group_concat::add(bool exclude_nulls)
{
/* Filter out duplicate rows. */
uint count= unique_filter->elements_in_tree();
unique_filter->unique_add(table->record[0] + table->s->null_bytes);
unique_filter->unique_add(get_record_pointer());
if (count == unique_filter->elements_in_tree())
row_eligible= FALSE;
}
@ -4047,7 +4113,7 @@ bool Item_func_group_concat::add(bool exclude_nulls)
row to the output buffer here. That will be done in val_str.
*/
if (row_eligible && !warning_for_row && (!tree && !distinct))
dump_leaf_key(table->record[0] + table->s->null_bytes, 1, this);
dump_leaf_key(get_record_pointer(), 1, this);
return 0;
}
@ -4247,9 +4313,9 @@ bool Item_func_group_concat::setup(THD *thd)
}
if (distinct)
unique_filter= new Unique(group_concat_key_cmp_with_distinct,
unique_filter= new Unique(get_comparator_function_for_distinct(),
(void*)this,
tree_key_length,
tree_key_length + get_null_bytes(),
ram_limitation(thd));
if ((row_limit && row_limit->cmp_type() != INT_RESULT) ||
(offset_limit && offset_limit->cmp_type() != INT_RESULT))
@ -4305,6 +4371,53 @@ String* Item_func_group_concat::val_str(String* str)
}
/*
@brief
Get the comparator function for DISTINT clause
*/
qsort_cmp2 Item_func_group_concat::get_comparator_function_for_distinct()
{
return skip_nulls() ?
group_concat_key_cmp_with_distinct :
group_concat_key_cmp_with_distinct_with_nulls;
}
/*
@brief
Get the record pointer of the current row of the table
@details
look at the comments for Item_func_group_concat::get_null_bytes
*/
uchar* Item_func_group_concat::get_record_pointer()
{
return skip_nulls() ?
table->record[0] + table->s->null_bytes :
table->record[0];
}
/*
@brief
Get the null bytes for the table if required.
@details
This function is used for GROUP_CONCAT (or JSON_ARRAYAGG) implementation
where the Unique tree or the ORDER BY tree may store the null values,
in such case we also store the null bytes inside each node of the tree.
*/
uint Item_func_group_concat::get_null_bytes()
{
return skip_nulls() ? 0 : table->s->null_bytes;
}
void Item_func_group_concat::print(String *str, enum_query_type query_type)
{
str->append(func_name());

View file

@ -1849,6 +1849,8 @@ public:
C_MODE_START
int group_concat_key_cmp_with_distinct(void* arg, const void* key1,
const void* key2);
int group_concat_key_cmp_with_distinct_with_nulls(void* arg, const void* key1,
const void* key2);
int group_concat_key_cmp_with_order(void* arg, const void* key1,
const void* key2);
int dump_leaf_key(void* key_arg,
@ -1913,6 +1915,9 @@ protected:
friend int group_concat_key_cmp_with_distinct(void* arg, const void* key1,
const void* key2);
friend int group_concat_key_cmp_with_distinct_with_nulls(void* arg,
const void* key1,
const void* key2);
friend int group_concat_key_cmp_with_order(void* arg, const void* key1,
const void* key2);
friend int dump_leaf_key(void* key_arg,
@ -2004,6 +2009,10 @@ public:
{ context= (Name_resolution_context *)cntx; return FALSE; }
Item *get_copy(THD *thd)
{ return get_item_copy<Item_func_group_concat>(thd, this); }
qsort_cmp2 get_comparator_function_for_distinct();
uchar* get_record_pointer();
uint get_null_bytes();
};
#endif /* ITEM_SUM_INCLUDED */