mirror of
https://github.com/MariaDB/server.git
synced 2025-01-16 03:52:35 +01:00
Bug#16539979 - BASIC SELECT COUNT(DISTINCT ID) IS BROKEN
Bug#17867117 - ERROR RESULT WHEN "COUNT + DISTINCT + CASE WHEN" NEED MERGE_WALK Problem: COUNT DISTINCT gives incorrect result when it uses a Unique Tree and its last inserted record has null value. Here is how COUNT DISTINCT is processed, given that this query is not using loose index scan. When a row is produced as a result of joining tables (there is only one table here), we store the SELECTed value in a Unique tree. This allows elimination of any duplicates, and thus implements DISTINCT. When we have processed all rows like this, we walk the Unique tree, counting its elements, in Aggregator_distinct::endup() (tree->walk()); for each element we call Item_sum_count::add(). Such function wants to ignore any NULL value, for that it checks item_sum -> args[0] -> null_value. It is a mistake: when walking the Unique tree, the value to be aggregated is not item_sum ->args[0] but rather table -> field[0]. Solution: instead of item_sum -> args[0] -> null_value, use arg_is_null(), which knows where to look (like in fix for bug 57932). As a consequence of this solution, we have to make arg_is_null() a little more general: 1) Because it was so far only used for AVG() (which always has a single argument), this function was looking at a single argument; now that it has to work with COUNT(DISTINCT expression1,expression2), it must look at all arguments. 2) Because we start using arg_is_null () for COUNT(DISTINCT), i.e. in Item_sum_count::add (), it implies that we are also using it for COUNT(no DISTINCT) (same add ()). For COUNT(no DISTINCT), the nullness to check is that of item_sum -> args[0]. But the null_value of such item is reliable only if val_*() has been called on it. So far arg_is_null() was always used after a call to arg_val*(), so could rely on null_value; but for COUNT, there is no call to arg_val*(), so arg_is_null() has to call is_null() instead. Testcase for 16539979 by Neeraj. Testcase for 17867117 contributed by Xiaobin Lin from Taobao.
This commit is contained in:
parent
494d0247b7
commit
c90cdf5d49
4 changed files with 146 additions and 41 deletions
|
@ -804,4 +804,44 @@ c
|
|||
11112222
|
||||
33334444
|
||||
DROP TABLE t1;
|
||||
#
|
||||
# Bug#16539979 BASIC SELECT COUNT(DISTINCT ID) IS BROKEN.
|
||||
# Bug#17867117 ERROR RESULT WHEN "COUNT + DISTINCT + CASE WHEN" NEED MERGE_WALK
|
||||
#
|
||||
SET @tmp_table_size_save= @@tmp_table_size;
|
||||
SET @@tmp_table_size= 1024;
|
||||
CREATE TABLE t1 (a INT);
|
||||
INSERT INTO t1 VALUES (1),(2),(3),(4),(5),(6),(7),(8);
|
||||
INSERT INTO t1 SELECT a+8 FROM t1;
|
||||
INSERT INTO t1 SELECT a+16 FROM t1;
|
||||
INSERT INTO t1 SELECT a+32 FROM t1;
|
||||
INSERT INTO t1 SELECT a+64 FROM t1;
|
||||
INSERT INTO t1 VALUE(NULL);
|
||||
SELECT COUNT(DISTINCT a) FROM t1;
|
||||
COUNT(DISTINCT a)
|
||||
128
|
||||
SELECT COUNT(DISTINCT (a+0)) FROM t1;
|
||||
COUNT(DISTINCT (a+0))
|
||||
128
|
||||
DROP TABLE t1;
|
||||
create table tb(
|
||||
id int auto_increment primary key,
|
||||
v varchar(32))
|
||||
engine=myisam charset=gbk;
|
||||
insert into tb(v) values("aaa");
|
||||
insert into tb(v) (select v from tb);
|
||||
insert into tb(v) (select v from tb);
|
||||
insert into tb(v) (select v from tb);
|
||||
insert into tb(v) (select v from tb);
|
||||
insert into tb(v) (select v from tb);
|
||||
insert into tb(v) (select v from tb);
|
||||
update tb set v=concat(v, id);
|
||||
select count(distinct case when id<=64 then id end) from tb;
|
||||
count(distinct case when id<=64 then id end)
|
||||
64
|
||||
select count(distinct case when id<=63 then id end) from tb;
|
||||
count(distinct case when id<=63 then id end)
|
||||
63
|
||||
drop table tb;
|
||||
SET @@tmp_table_size= @tmp_table_size_save;
|
||||
End of 5.5 tests
|
||||
|
|
|
@ -625,5 +625,42 @@ INSERT INTO t1 VALUES (1111, 2222), (3333, 4444);
|
|||
SELECT DISTINCT CONCAT(a,b) AS c FROM t1 ORDER BY 1;
|
||||
DROP TABLE t1;
|
||||
|
||||
--echo #
|
||||
--echo # Bug#16539979 BASIC SELECT COUNT(DISTINCT ID) IS BROKEN.
|
||||
--echo # Bug#17867117 ERROR RESULT WHEN "COUNT + DISTINCT + CASE WHEN" NEED MERGE_WALK
|
||||
--echo #
|
||||
|
||||
SET @tmp_table_size_save= @@tmp_table_size;
|
||||
SET @@tmp_table_size= 1024;
|
||||
|
||||
CREATE TABLE t1 (a INT);
|
||||
INSERT INTO t1 VALUES (1),(2),(3),(4),(5),(6),(7),(8);
|
||||
INSERT INTO t1 SELECT a+8 FROM t1;
|
||||
INSERT INTO t1 SELECT a+16 FROM t1;
|
||||
INSERT INTO t1 SELECT a+32 FROM t1;
|
||||
INSERT INTO t1 SELECT a+64 FROM t1;
|
||||
INSERT INTO t1 VALUE(NULL);
|
||||
SELECT COUNT(DISTINCT a) FROM t1;
|
||||
SELECT COUNT(DISTINCT (a+0)) FROM t1;
|
||||
DROP TABLE t1;
|
||||
|
||||
create table tb(
|
||||
id int auto_increment primary key,
|
||||
v varchar(32))
|
||||
engine=myisam charset=gbk;
|
||||
insert into tb(v) values("aaa");
|
||||
insert into tb(v) (select v from tb);
|
||||
insert into tb(v) (select v from tb);
|
||||
insert into tb(v) (select v from tb);
|
||||
insert into tb(v) (select v from tb);
|
||||
insert into tb(v) (select v from tb);
|
||||
insert into tb(v) (select v from tb);
|
||||
|
||||
update tb set v=concat(v, id);
|
||||
select count(distinct case when id<=64 then id end) from tb;
|
||||
select count(distinct case when id<=63 then id end) from tb;
|
||||
drop table tb;
|
||||
|
||||
SET @@tmp_table_size= @tmp_table_size_save;
|
||||
|
||||
--echo End of 5.5 tests
|
||||
|
|
|
@ -1052,18 +1052,19 @@ void Aggregator_distinct::endup()
|
|||
endup_done= TRUE;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
We don't have a tree only if 'setup()' hasn't been called;
|
||||
this is the case of sql_select.cc:return_zero_rows.
|
||||
*/
|
||||
if (tree)
|
||||
table->field[0]->set_notnull();
|
||||
}
|
||||
|
||||
/*
|
||||
We don't have a tree only if 'setup()' hasn't been called;
|
||||
this is the case of sql_executor.cc:return_zero_rows.
|
||||
*/
|
||||
if (tree && !endup_done)
|
||||
{
|
||||
/*
|
||||
All tree's values are not NULL.
|
||||
Note that value of field is changed as we walk the tree, in
|
||||
Aggregator_distinct::unique_walk_function, but it's always not NULL.
|
||||
*/
|
||||
table->field[0]->set_notnull();
|
||||
/* go over the tree of distinct keys and calculate the aggregate value */
|
||||
use_distinct_values= TRUE;
|
||||
tree->walk(item_sum_distinct_walk, (void*) this);
|
||||
|
@ -1334,7 +1335,7 @@ bool Item_sum_sum::add()
|
|||
{
|
||||
my_decimal value;
|
||||
const my_decimal *val= aggr->arg_val_decimal(&value);
|
||||
if (!aggr->arg_is_null())
|
||||
if (!aggr->arg_is_null(true))
|
||||
{
|
||||
my_decimal_add(E_DEC_FATAL_ERROR, dec_buffs + (curr_dec_buff^1),
|
||||
val, dec_buffs + curr_dec_buff);
|
||||
|
@ -1345,7 +1346,7 @@ bool Item_sum_sum::add()
|
|||
else
|
||||
{
|
||||
sum+= aggr->arg_val_real();
|
||||
if (!aggr->arg_is_null())
|
||||
if (!aggr->arg_is_null(true))
|
||||
null_value= 0;
|
||||
}
|
||||
DBUG_RETURN(0);
|
||||
|
@ -1455,9 +1456,27 @@ double Aggregator_simple::arg_val_real()
|
|||
}
|
||||
|
||||
|
||||
bool Aggregator_simple::arg_is_null()
|
||||
bool Aggregator_simple::arg_is_null(bool use_null_value)
|
||||
{
|
||||
return item_sum->args[0]->null_value;
|
||||
Item **item= item_sum->args;
|
||||
const uint item_count= item_sum->arg_count;
|
||||
if (use_null_value)
|
||||
{
|
||||
for (uint i= 0; i < item_count; i++)
|
||||
{
|
||||
if (item[i]->null_value)
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (uint i= 0; i < item_count; i++)
|
||||
{
|
||||
if (item[i]->maybe_null && item[i]->is_null())
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
|
@ -1475,10 +1494,17 @@ double Aggregator_distinct::arg_val_real()
|
|||
}
|
||||
|
||||
|
||||
bool Aggregator_distinct::arg_is_null()
|
||||
bool Aggregator_distinct::arg_is_null(bool use_null_value)
|
||||
{
|
||||
return use_distinct_values ? table->field[0]->is_null() :
|
||||
item_sum->args[0]->null_value;
|
||||
if (use_distinct_values)
|
||||
{
|
||||
const bool rc= table->field[0]->is_null();
|
||||
DBUG_ASSERT(!rc); // NULLs are never stored in 'tree'
|
||||
return rc;
|
||||
}
|
||||
return use_null_value ?
|
||||
item_sum->args[0]->null_value :
|
||||
(item_sum->args[0]->maybe_null && item_sum->args[0]->is_null());
|
||||
}
|
||||
|
||||
|
||||
|
@ -1496,11 +1522,8 @@ void Item_sum_count::clear()
|
|||
|
||||
bool Item_sum_count::add()
|
||||
{
|
||||
for (uint i=0; i<arg_count; i++)
|
||||
{
|
||||
if (args[i]->maybe_null && args[i]->is_null())
|
||||
return 0;
|
||||
}
|
||||
if (aggr->arg_is_null(false))
|
||||
return 0;
|
||||
count++;
|
||||
return 0;
|
||||
}
|
||||
|
@ -1591,7 +1614,7 @@ bool Item_sum_avg::add()
|
|||
{
|
||||
if (Item_sum_sum::add())
|
||||
return TRUE;
|
||||
if (!aggr->arg_is_null())
|
||||
if (!aggr->arg_is_null(true))
|
||||
count++;
|
||||
return FALSE;
|
||||
}
|
||||
|
|
|
@ -58,19 +58,8 @@ protected:
|
|||
/* the aggregate function class to act on */
|
||||
Item_sum *item_sum;
|
||||
|
||||
/**
|
||||
When feeding back the data in endup() from Unique/temp table back to
|
||||
Item_sum::add() methods we must read the data from Unique (and not
|
||||
recalculate the functions that are given as arguments to the aggregate
|
||||
function.
|
||||
This flag is to tell the add() methods to take the data from the Unique
|
||||
instead by calling the relevant val_..() method
|
||||
*/
|
||||
|
||||
bool use_distinct_values;
|
||||
|
||||
public:
|
||||
Aggregator (Item_sum *arg): item_sum(arg), use_distinct_values(FALSE) {}
|
||||
Aggregator (Item_sum *arg): item_sum(arg) {}
|
||||
virtual ~Aggregator () {} /* Keep gcc happy */
|
||||
|
||||
enum Aggregator_type { SIMPLE_AGGREGATOR, DISTINCT_AGGREGATOR };
|
||||
|
@ -107,10 +96,16 @@ public:
|
|||
/** Floating point value of being-aggregated argument */
|
||||
virtual double arg_val_real() = 0;
|
||||
/**
|
||||
NULLness of being-aggregated argument; can be called only after
|
||||
arg_val_decimal() or arg_val_real().
|
||||
NULLness of being-aggregated argument.
|
||||
|
||||
@param use_null_value Optimization: to determine if the argument is NULL
|
||||
we must, in the general case, call is_null() on it, which itself might
|
||||
call val_*() on it, which might be costly. If you just have called
|
||||
arg_val*(), you can pass use_null_value=true; this way, arg_is_null()
|
||||
might avoid is_null() and instead do a cheap read of the Item's null_value
|
||||
(updated by arg_val*()).
|
||||
*/
|
||||
virtual bool arg_is_null() = 0;
|
||||
virtual bool arg_is_null(bool use_null_value) = 0;
|
||||
};
|
||||
|
||||
|
||||
|
@ -480,7 +475,7 @@ public:
|
|||
|
||||
Item *get_arg(uint i) { return args[i]; }
|
||||
Item *set_arg(uint i, THD *thd, Item *new_val);
|
||||
uint get_arg_count() { return arg_count; }
|
||||
uint get_arg_count() const { return arg_count; }
|
||||
|
||||
/* Initialization of distinct related members */
|
||||
void init_aggregator()
|
||||
|
@ -607,10 +602,20 @@ class Aggregator_distinct : public Aggregator
|
|||
*/
|
||||
bool always_null;
|
||||
|
||||
/**
|
||||
When feeding back the data in endup() from Unique/temp table back to
|
||||
Item_sum::add() methods we must read the data from Unique (and not
|
||||
recalculate the functions that are given as arguments to the aggregate
|
||||
function.
|
||||
This flag is to tell the arg_*() methods to take the data from the Unique
|
||||
instead of calling the relevant val_..() method.
|
||||
*/
|
||||
bool use_distinct_values;
|
||||
|
||||
public:
|
||||
Aggregator_distinct (Item_sum *sum) :
|
||||
Aggregator(sum), table(NULL), tmp_table_param(NULL), tree(NULL),
|
||||
always_null(FALSE) {}
|
||||
always_null(false), use_distinct_values(false) {}
|
||||
virtual ~Aggregator_distinct ();
|
||||
Aggregator_type Aggrtype() { return DISTINCT_AGGREGATOR; }
|
||||
|
||||
|
@ -620,7 +625,7 @@ public:
|
|||
void endup();
|
||||
virtual my_decimal *arg_val_decimal(my_decimal * value);
|
||||
virtual double arg_val_real();
|
||||
virtual bool arg_is_null();
|
||||
virtual bool arg_is_null(bool use_null_value);
|
||||
|
||||
bool unique_walk_function(void *element);
|
||||
static int composite_key_cmp(void* arg, uchar* key1, uchar* key2);
|
||||
|
@ -646,7 +651,7 @@ public:
|
|||
void endup() {};
|
||||
virtual my_decimal *arg_val_decimal(my_decimal * value);
|
||||
virtual double arg_val_real();
|
||||
virtual bool arg_is_null();
|
||||
virtual bool arg_is_null(bool use_null_value);
|
||||
};
|
||||
|
||||
|
||||
|
|
Loading…
Reference in a new issue