Bug#16539979 - BASIC SELECT COUNT(DISTINCT ID) IS BROKEN

Bug#17867117 - ERROR RESULT WHEN "COUNT + DISTINCT + CASE WHEN" NEED MERGE_WALK 

Problem:
COUNT DISTINCT gives incorrect result when it uses a Unique
Tree and its last inserted record has null value.

Here is how COUNT DISTINCT is processed, given that this query is not
using loose index scan.

When a row is produced as a result of joining tables (there is only
one table here), we store the SELECTed value in a Unique tree. This
allows elimination of any duplicates, and thus implements DISTINCT.

When we have processed all rows like this, we walk the Unique tree,
counting its elements, in Aggregator_distinct::endup() (tree->walk());
for each element we call Item_sum_count::add(). Such function wants to
ignore any NULL value, for that it checks item_sum -> args[0] ->
null_value. It is a mistake: when walking the Unique tree, the value
to be aggregated is not item_sum ->args[0] but rather table ->
field[0].

Solution:
instead of item_sum -> args[0] -> null_value, use arg_is_null(), which
knows where to look (like in fix for bug 57932).

As a consequence of this solution, we have to make arg_is_null() a
little more general:
1) Because it was so far only used for AVG() (which always has a
single argument), this function was looking at a single argument; now
that it has to work with COUNT(DISTINCT expression1,expression2), it
must look at all arguments.
2) Because we start using arg_is_null () for COUNT(DISTINCT), i.e. in
Item_sum_count::add (), it implies that we are also using it for
COUNT(no DISTINCT) (same add ()). For COUNT(no DISTINCT), the
nullness to check is that of item_sum -> args[0]. But the null_value
of such item is reliable only if val_*() has been called on it. So far
arg_is_null() was always used after a call to arg_val*(), so could
rely on null_value; but for COUNT, there is no call to arg_val*(), so
arg_is_null() has to call is_null() instead.

Testcase for 16539979 by Neeraj. Testcase for 17867117 contributed by
Xiaobin Lin from Taobao.
This commit is contained in:
Guilhem Bichot 2013-12-04 12:32:42 +01:00
parent 494d0247b7
commit c90cdf5d49
4 changed files with 146 additions and 41 deletions

View file

@ -804,4 +804,44 @@ c
11112222
33334444
DROP TABLE t1;
#
# Bug#16539979 BASIC SELECT COUNT(DISTINCT ID) IS BROKEN.
# Bug#17867117 ERROR RESULT WHEN "COUNT + DISTINCT + CASE WHEN" NEED MERGE_WALK
#
SET @tmp_table_size_save= @@tmp_table_size;
SET @@tmp_table_size= 1024;
CREATE TABLE t1 (a INT);
INSERT INTO t1 VALUES (1),(2),(3),(4),(5),(6),(7),(8);
INSERT INTO t1 SELECT a+8 FROM t1;
INSERT INTO t1 SELECT a+16 FROM t1;
INSERT INTO t1 SELECT a+32 FROM t1;
INSERT INTO t1 SELECT a+64 FROM t1;
INSERT INTO t1 VALUE(NULL);
SELECT COUNT(DISTINCT a) FROM t1;
COUNT(DISTINCT a)
128
SELECT COUNT(DISTINCT (a+0)) FROM t1;
COUNT(DISTINCT (a+0))
128
DROP TABLE t1;
create table tb(
id int auto_increment primary key,
v varchar(32))
engine=myisam charset=gbk;
insert into tb(v) values("aaa");
insert into tb(v) (select v from tb);
insert into tb(v) (select v from tb);
insert into tb(v) (select v from tb);
insert into tb(v) (select v from tb);
insert into tb(v) (select v from tb);
insert into tb(v) (select v from tb);
update tb set v=concat(v, id);
select count(distinct case when id<=64 then id end) from tb;
count(distinct case when id<=64 then id end)
64
select count(distinct case when id<=63 then id end) from tb;
count(distinct case when id<=63 then id end)
63
drop table tb;
SET @@tmp_table_size= @tmp_table_size_save;
End of 5.5 tests

View file

@ -625,5 +625,42 @@ INSERT INTO t1 VALUES (1111, 2222), (3333, 4444);
SELECT DISTINCT CONCAT(a,b) AS c FROM t1 ORDER BY 1;
DROP TABLE t1;
--echo #
--echo # Bug#16539979 BASIC SELECT COUNT(DISTINCT ID) IS BROKEN.
--echo # Bug#17867117 ERROR RESULT WHEN "COUNT + DISTINCT + CASE WHEN" NEED MERGE_WALK
--echo #
SET @tmp_table_size_save= @@tmp_table_size;
SET @@tmp_table_size= 1024;
CREATE TABLE t1 (a INT);
INSERT INTO t1 VALUES (1),(2),(3),(4),(5),(6),(7),(8);
INSERT INTO t1 SELECT a+8 FROM t1;
INSERT INTO t1 SELECT a+16 FROM t1;
INSERT INTO t1 SELECT a+32 FROM t1;
INSERT INTO t1 SELECT a+64 FROM t1;
INSERT INTO t1 VALUE(NULL);
SELECT COUNT(DISTINCT a) FROM t1;
SELECT COUNT(DISTINCT (a+0)) FROM t1;
DROP TABLE t1;
create table tb(
id int auto_increment primary key,
v varchar(32))
engine=myisam charset=gbk;
insert into tb(v) values("aaa");
insert into tb(v) (select v from tb);
insert into tb(v) (select v from tb);
insert into tb(v) (select v from tb);
insert into tb(v) (select v from tb);
insert into tb(v) (select v from tb);
insert into tb(v) (select v from tb);
update tb set v=concat(v, id);
select count(distinct case when id<=64 then id end) from tb;
select count(distinct case when id<=63 then id end) from tb;
drop table tb;
SET @@tmp_table_size= @tmp_table_size_save;
--echo End of 5.5 tests

View file

@ -1052,18 +1052,19 @@ void Aggregator_distinct::endup()
endup_done= TRUE;
}
}
else
{
/*
We don't have a tree only if 'setup()' hasn't been called;
this is the case of sql_select.cc:return_zero_rows.
*/
if (tree)
table->field[0]->set_notnull();
}
/*
We don't have a tree only if 'setup()' hasn't been called;
this is the case of sql_executor.cc:return_zero_rows.
*/
if (tree && !endup_done)
{
/*
All tree's values are not NULL.
Note that value of field is changed as we walk the tree, in
Aggregator_distinct::unique_walk_function, but it's always not NULL.
*/
table->field[0]->set_notnull();
/* go over the tree of distinct keys and calculate the aggregate value */
use_distinct_values= TRUE;
tree->walk(item_sum_distinct_walk, (void*) this);
@ -1334,7 +1335,7 @@ bool Item_sum_sum::add()
{
my_decimal value;
const my_decimal *val= aggr->arg_val_decimal(&value);
if (!aggr->arg_is_null())
if (!aggr->arg_is_null(true))
{
my_decimal_add(E_DEC_FATAL_ERROR, dec_buffs + (curr_dec_buff^1),
val, dec_buffs + curr_dec_buff);
@ -1345,7 +1346,7 @@ bool Item_sum_sum::add()
else
{
sum+= aggr->arg_val_real();
if (!aggr->arg_is_null())
if (!aggr->arg_is_null(true))
null_value= 0;
}
DBUG_RETURN(0);
@ -1455,9 +1456,27 @@ double Aggregator_simple::arg_val_real()
}
bool Aggregator_simple::arg_is_null()
bool Aggregator_simple::arg_is_null(bool use_null_value)
{
return item_sum->args[0]->null_value;
Item **item= item_sum->args;
const uint item_count= item_sum->arg_count;
if (use_null_value)
{
for (uint i= 0; i < item_count; i++)
{
if (item[i]->null_value)
return true;
}
}
else
{
for (uint i= 0; i < item_count; i++)
{
if (item[i]->maybe_null && item[i]->is_null())
return true;
}
}
return false;
}
@ -1475,10 +1494,17 @@ double Aggregator_distinct::arg_val_real()
}
bool Aggregator_distinct::arg_is_null()
bool Aggregator_distinct::arg_is_null(bool use_null_value)
{
return use_distinct_values ? table->field[0]->is_null() :
item_sum->args[0]->null_value;
if (use_distinct_values)
{
const bool rc= table->field[0]->is_null();
DBUG_ASSERT(!rc); // NULLs are never stored in 'tree'
return rc;
}
return use_null_value ?
item_sum->args[0]->null_value :
(item_sum->args[0]->maybe_null && item_sum->args[0]->is_null());
}
@ -1496,11 +1522,8 @@ void Item_sum_count::clear()
bool Item_sum_count::add()
{
for (uint i=0; i<arg_count; i++)
{
if (args[i]->maybe_null && args[i]->is_null())
return 0;
}
if (aggr->arg_is_null(false))
return 0;
count++;
return 0;
}
@ -1591,7 +1614,7 @@ bool Item_sum_avg::add()
{
if (Item_sum_sum::add())
return TRUE;
if (!aggr->arg_is_null())
if (!aggr->arg_is_null(true))
count++;
return FALSE;
}

View file

@ -58,19 +58,8 @@ protected:
/* the aggregate function class to act on */
Item_sum *item_sum;
/**
When feeding back the data in endup() from Unique/temp table back to
Item_sum::add() methods we must read the data from Unique (and not
recalculate the functions that are given as arguments to the aggregate
function.
This flag is to tell the add() methods to take the data from the Unique
instead by calling the relevant val_..() method
*/
bool use_distinct_values;
public:
Aggregator (Item_sum *arg): item_sum(arg), use_distinct_values(FALSE) {}
Aggregator (Item_sum *arg): item_sum(arg) {}
virtual ~Aggregator () {} /* Keep gcc happy */
enum Aggregator_type { SIMPLE_AGGREGATOR, DISTINCT_AGGREGATOR };
@ -107,10 +96,16 @@ public:
/** Floating point value of being-aggregated argument */
virtual double arg_val_real() = 0;
/**
NULLness of being-aggregated argument; can be called only after
arg_val_decimal() or arg_val_real().
NULLness of being-aggregated argument.
@param use_null_value Optimization: to determine if the argument is NULL
we must, in the general case, call is_null() on it, which itself might
call val_*() on it, which might be costly. If you just have called
arg_val*(), you can pass use_null_value=true; this way, arg_is_null()
might avoid is_null() and instead do a cheap read of the Item's null_value
(updated by arg_val*()).
*/
virtual bool arg_is_null() = 0;
virtual bool arg_is_null(bool use_null_value) = 0;
};
@ -480,7 +475,7 @@ public:
Item *get_arg(uint i) { return args[i]; }
Item *set_arg(uint i, THD *thd, Item *new_val);
uint get_arg_count() { return arg_count; }
uint get_arg_count() const { return arg_count; }
/* Initialization of distinct related members */
void init_aggregator()
@ -607,10 +602,20 @@ class Aggregator_distinct : public Aggregator
*/
bool always_null;
/**
When feeding back the data in endup() from Unique/temp table back to
Item_sum::add() methods we must read the data from Unique (and not
recalculate the functions that are given as arguments to the aggregate
function.
This flag is to tell the arg_*() methods to take the data from the Unique
instead of calling the relevant val_..() method.
*/
bool use_distinct_values;
public:
Aggregator_distinct (Item_sum *sum) :
Aggregator(sum), table(NULL), tmp_table_param(NULL), tree(NULL),
always_null(FALSE) {}
always_null(false), use_distinct_values(false) {}
virtual ~Aggregator_distinct ();
Aggregator_type Aggrtype() { return DISTINCT_AGGREGATOR; }
@ -620,7 +625,7 @@ public:
void endup();
virtual my_decimal *arg_val_decimal(my_decimal * value);
virtual double arg_val_real();
virtual bool arg_is_null();
virtual bool arg_is_null(bool use_null_value);
bool unique_walk_function(void *element);
static int composite_key_cmp(void* arg, uchar* key1, uchar* key2);
@ -646,7 +651,7 @@ public:
void endup() {};
virtual my_decimal *arg_val_decimal(my_decimal * value);
virtual double arg_val_real();
virtual bool arg_is_null();
virtual bool arg_is_null(bool use_null_value);
};