Merge rurik.mysql.com:/home/igor/mysql-5.0

into rurik.mysql.com:/home/igor/dev/mysql-5.0-2


sql/mysql_priv.h:
  Auto merged
sql/sql_base.cc:
  Auto merged
This commit is contained in:
unknown 2005-10-17 15:02:06 -07:00
commit 8a84dd7e7f
25 changed files with 772 additions and 75 deletions

View file

@ -163,7 +163,7 @@ show warnings;
Level Code Message
Warning 1260 1 line(s) were cut by GROUP_CONCAT()
set group_concat_max_len = 1024;
select group_concat(sum(a)) from t1 group by grp;
select group_concat(sum(c)) from t1 group by grp;
ERROR HY000: Invalid use of group function
select grp,group_concat(c order by 2) from t1 group by grp;
ERROR 42S22: Unknown column '2' in 'order clause'

View file

@ -2988,3 +2988,146 @@ max(fld)
1
drop table t1;
purge master logs before (select adddate(current_timestamp(), interval -4 day));
CREATE TABLE t1 (a int, b int);
CREATE TABLE t2 (c int, d int);
CREATE TABLE t3 (e int);
INSERT INTO t1 VALUES
(1,10), (2,10), (1,20), (2,20), (3,20), (2,30), (4,40);
INSERT INTO t2 VALUES
(2,10), (2,20), (4,10), (5,10), (3,20), (2,40);
INSERT INTO t3 VALUES (10), (30), (10), (20) ;
SELECT a, MAX(b), MIN(b) FROM t1 GROUP BY a;
a MAX(b) MIN(b)
1 20 10
2 30 10
3 20 20
4 40 40
SELECT * FROM t2;
c d
2 10
2 20
4 10
5 10
3 20
2 40
SELECT * FROM t3;
e
10
30
10
20
SELECT a FROM t1 GROUP BY a
HAVING a IN (SELECT c FROM t2 WHERE MAX(b)>20);
a
2
4
SELECT a FROM t1 GROUP BY a
HAVING a IN (SELECT c FROM t2 WHERE MAX(b)<d);
a
2
SELECT a FROM t1 GROUP BY a
HAVING a IN (SELECT c FROM t2 WHERE MAX(b)>d);
a
2
4
SELECT a FROM t1 GROUP BY a
HAVING a IN (SELECT c FROM t2
WHERE d >= SOME(SELECT e FROM t3 WHERE MAX(b)=e));
a
2
3
SELECT a FROM t1 GROUP BY a
HAVING a IN (SELECT c FROM t2
WHERE EXISTS(SELECT e FROM t3 WHERE MAX(b)=e AND e <= d));
a
2
3
SELECT a FROM t1 GROUP BY a
HAVING a IN (SELECT c FROM t2
WHERE d > SOME(SELECT e FROM t3 WHERE MAX(b)=e));
a
2
SELECT a FROM t1 GROUP BY a
HAVING a IN (SELECT c FROM t2
WHERE EXISTS(SELECT e FROM t3 WHERE MAX(b)=e AND e < d));
a
2
SELECT a FROM t1 GROUP BY a
HAVING a IN (SELECT c FROM t2
WHERE MIN(b) < d AND
EXISTS(SELECT e FROM t3 WHERE MAX(b)=e AND e <= d));
a
2
SELECT a, SUM(a) FROM t1 GROUP BY a;
a SUM(a)
1 2
2 6
3 3
4 4
SELECT a FROM t1
WHERE EXISTS(SELECT c FROM t2 GROUP BY c HAVING SUM(a) = c) GROUP BY a;
a
3
4
SELECT a FROM t1 GROUP BY a
HAVING EXISTS(SELECT c FROM t2 GROUP BY c HAVING SUM(a) = c);
a
1
3
4
SELECT a FROM t1
WHERE a < 3 AND
EXISTS(SELECT c FROM t2 GROUP BY c HAVING SUM(a) != c) GROUP BY a;
a
1
2
SELECT a FROM t1
WHERE a < 3 AND
EXISTS(SELECT c FROM t2 GROUP BY c HAVING SUM(a) != c);
a
1
2
1
2
2
SELECT t1.a FROM t1 GROUP BY t1.a
HAVING t1.a < ALL(SELECT t2.c FROM t2 GROUP BY t2.c
HAVING EXISTS(SELECT t3.e FROM t3 GROUP BY t3.e
HAVING SUM(t1.a+t2.c) < t3.e/4));
a
1
2
SELECT t1.a FROM t1 GROUP BY t1.a
HAVING t1.a > ALL(SELECT t2.c FROM t2
WHERE EXISTS(SELECT t3.e FROM t3 GROUP BY t3.e
HAVING SUM(t1.a+t2.c) < t3.e/4));
a
4
SELECT t1.a FROM t1 GROUP BY t1.a
HAVING t1.a > ALL(SELECT t2.c FROM t2
WHERE EXISTS(SELECT t3.e FROM t3
WHERE SUM(t1.a+t2.c) < t3.e/4));
ERROR HY000: Invalid use of group function
SELECT t1.a from t1 GROUP BY t1.a HAVING AVG(SUM(t1.b)) > 20;
ERROR HY000: Invalid use of group function
SELECT t1.a FROM t1 GROUP BY t1.a
HAVING t1.a IN (SELECT t2.c FROM t2 GROUP BY t2.c
HAVING AVG(t2.c+SUM(t1.b)) > 20);
a
2
3
4
SELECT t1.a FROM t1 GROUP BY t1.a
HAVING t1.a IN (SELECT t2.c FROM t2 GROUP BY t2.c
HAVING AVG(SUM(t1.b)) > 20);
a
2
4
SELECT t1.a, SUM(b) AS sum FROM t1 GROUP BY t1.a
HAVING t1.a IN (SELECT t2.c FROM t2 GROUP BY t2.c
HAVING t2.c+sum > 20);
a sum
2 60
3 20
4 40
DROP TABLE t1,t2,t3;

View file

@ -69,7 +69,7 @@ set group_concat_max_len = 1024;
# Test errors
--error 1111
select group_concat(sum(a)) from t1 group by grp;
select group_concat(sum(c)) from t1 group by grp;
--error 1054
select grp,group_concat(c order by 2) from t1 group by grp;

View file

@ -1968,3 +1968,86 @@ drop table t1;
purge master logs before (select adddate(current_timestamp(), interval -4 day));
#
# Test for bug #11762: subquery with an aggregate function in HAVING
#
CREATE TABLE t1 (a int, b int);
CREATE TABLE t2 (c int, d int);
CREATE TABLE t3 (e int);
INSERT INTO t1 VALUES
(1,10), (2,10), (1,20), (2,20), (3,20), (2,30), (4,40);
INSERT INTO t2 VALUES
(2,10), (2,20), (4,10), (5,10), (3,20), (2,40);
INSERT INTO t3 VALUES (10), (30), (10), (20) ;
SELECT a, MAX(b), MIN(b) FROM t1 GROUP BY a;
SELECT * FROM t2;
SELECT * FROM t3;
SELECT a FROM t1 GROUP BY a
HAVING a IN (SELECT c FROM t2 WHERE MAX(b)>20);
SELECT a FROM t1 GROUP BY a
HAVING a IN (SELECT c FROM t2 WHERE MAX(b)<d);
SELECT a FROM t1 GROUP BY a
HAVING a IN (SELECT c FROM t2 WHERE MAX(b)>d);
SELECT a FROM t1 GROUP BY a
HAVING a IN (SELECT c FROM t2
WHERE d >= SOME(SELECT e FROM t3 WHERE MAX(b)=e));
SELECT a FROM t1 GROUP BY a
HAVING a IN (SELECT c FROM t2
WHERE EXISTS(SELECT e FROM t3 WHERE MAX(b)=e AND e <= d));
SELECT a FROM t1 GROUP BY a
HAVING a IN (SELECT c FROM t2
WHERE d > SOME(SELECT e FROM t3 WHERE MAX(b)=e));
SELECT a FROM t1 GROUP BY a
HAVING a IN (SELECT c FROM t2
WHERE EXISTS(SELECT e FROM t3 WHERE MAX(b)=e AND e < d));
SELECT a FROM t1 GROUP BY a
HAVING a IN (SELECT c FROM t2
WHERE MIN(b) < d AND
EXISTS(SELECT e FROM t3 WHERE MAX(b)=e AND e <= d));
SELECT a, SUM(a) FROM t1 GROUP BY a;
SELECT a FROM t1
WHERE EXISTS(SELECT c FROM t2 GROUP BY c HAVING SUM(a) = c) GROUP BY a;
SELECT a FROM t1 GROUP BY a
HAVING EXISTS(SELECT c FROM t2 GROUP BY c HAVING SUM(a) = c);
SELECT a FROM t1
WHERE a < 3 AND
EXISTS(SELECT c FROM t2 GROUP BY c HAVING SUM(a) != c) GROUP BY a;
SELECT a FROM t1
WHERE a < 3 AND
EXISTS(SELECT c FROM t2 GROUP BY c HAVING SUM(a) != c);
SELECT t1.a FROM t1 GROUP BY t1.a
HAVING t1.a < ALL(SELECT t2.c FROM t2 GROUP BY t2.c
HAVING EXISTS(SELECT t3.e FROM t3 GROUP BY t3.e
HAVING SUM(t1.a+t2.c) < t3.e/4));
SELECT t1.a FROM t1 GROUP BY t1.a
HAVING t1.a > ALL(SELECT t2.c FROM t2
WHERE EXISTS(SELECT t3.e FROM t3 GROUP BY t3.e
HAVING SUM(t1.a+t2.c) < t3.e/4));
-- error 1111
SELECT t1.a FROM t1 GROUP BY t1.a
HAVING t1.a > ALL(SELECT t2.c FROM t2
WHERE EXISTS(SELECT t3.e FROM t3
WHERE SUM(t1.a+t2.c) < t3.e/4));
-- error 1111
SELECT t1.a from t1 GROUP BY t1.a HAVING AVG(SUM(t1.b)) > 20;
SELECT t1.a FROM t1 GROUP BY t1.a
HAVING t1.a IN (SELECT t2.c FROM t2 GROUP BY t2.c
HAVING AVG(t2.c+SUM(t1.b)) > 20);
SELECT t1.a FROM t1 GROUP BY t1.a
HAVING t1.a IN (SELECT t2.c FROM t2 GROUP BY t2.c
HAVING AVG(SUM(t1.b)) > 20);
SELECT t1.a, SUM(b) AS sum FROM t1 GROUP BY t1.a
HAVING t1.a IN (SELECT t2.c FROM t2 GROUP BY t2.c
HAVING t2.c+sum > 20);
DROP TABLE t1,t2,t3;

View file

@ -1021,6 +1021,7 @@ void Item_name_const::print(String *str)
ref_pointer_array Pointer to array of reference fields
fields All fields in select
ref Pointer to item
skip_registered <=> the function must skipped for registered SUM items
NOTES
This is from split_sum_func2() for items that should be split
@ -1033,8 +1034,13 @@ void Item_name_const::print(String *str)
void Item::split_sum_func2(THD *thd, Item **ref_pointer_array,
List<Item> &fields, Item **ref)
List<Item> &fields, Item **ref,
bool skip_registered)
{
/* An item of type Item_sum is registered <=> ref_by != 0 */
if (type() == SUM_FUNC_ITEM && skip_registered &&
((Item_sum *) this)->ref_by)
return;
if (type() != SUM_FUNC_ITEM && with_sum_func)
{
/* Will split complicated items and ignore simple ones */
@ -3166,14 +3172,8 @@ resolve_ref_in_select_and_group(THD *thd, Item_ident *ref, SELECT_LEX *select)
{
for each outer query Q_k beginning from the inner-most one
{
if - Q_k is not a group query AND
- Q_k is not inside an aggregate function
OR
- Q_(k-1) is not in a HAVING or SELECT clause of Q_k
{
search for a column or derived column named col_ref_i
[in table T_j] in the FROM clause of Q_k;
}
search for a column or derived column named col_ref_i
[in table T_j] in the FROM clause of Q_k;
if such a column is not found
Search for a column or derived column named col_ref_i
@ -3253,18 +3253,11 @@ bool Item_field::fix_fields(THD *thd, Item **reference)
place= prev_subselect_item->parsing_place;
/*
Check table fields only if the subquery is used somewhere out of
HAVING, or the outer SELECT does not use grouping (i.e. tables are
accessible).
In case of a view, find_field_in_tables() writes the pointer to
the found view field into '*reference', in other words, it
substitutes this Item_field with the found expression.
*/
if ((place != IN_HAVING ||
(!select->with_sum_func &&
select->group_list.elements == 0)) &&
(from_field= find_field_in_tables(thd, this,
if ((from_field= find_field_in_tables(thd, this,
outer_context->
first_name_resolution_table,
outer_context->
@ -3282,6 +3275,21 @@ bool Item_field::fix_fields(THD *thd, Item **reference)
{
prev_subselect_item->used_tables_cache|= from_field->table->map;
prev_subselect_item->const_item_cache= 0;
if (thd->lex->in_sum_func &&
thd->lex->in_sum_func->nest_level ==
thd->lex->current_select->nest_level)
{
Item::Type type= (*reference)->type();
set_if_bigger(thd->lex->in_sum_func->max_arg_level,
select->nest_level);
set_field(from_field);
fixed= 1;
mark_as_dependent(thd, last_checked_context->select_lex,
context->select_lex, this,
((type == REF_ITEM || type == FIELD_ITEM) ?
(Item_ident*) (*reference) : 0));
return FALSE;
}
}
else
{
@ -3433,6 +3441,11 @@ bool Item_field::fix_fields(THD *thd, Item **reference)
return FALSE;
set_field(from_field);
if (thd->lex->in_sum_func &&
thd->lex->in_sum_func->nest_level ==
thd->lex->current_select->nest_level)
set_if_bigger(thd->lex->in_sum_func->max_arg_level,
thd->lex->current_select->nest_level);
}
else if (thd->set_query_id && field->query_id != thd->query_id)
{
@ -4589,9 +4602,8 @@ bool Item_ref::fix_fields(THD *thd, Item **reference)
aggregate function.
*/
if (((*ref)->with_sum_func && name &&
(depended_from ||
!(current_sel->linkage != GLOBAL_OPTIONS_TYPE &&
current_sel->having_fix_field))) ||
!(current_sel->linkage != GLOBAL_OPTIONS_TYPE &&
current_sel->having_fix_field)) ||
!(*ref)->fixed)
{
my_error(ER_ILLEGAL_REFERENCE, MYF(0),

View file

@ -599,7 +599,7 @@ public:
List<Item> &fields) {}
/* Called for items that really have to be split */
void split_sum_func2(THD *thd, Item **ref_pointer_array, List<Item> &fields,
Item **ref);
Item **ref, bool skip_registered);
virtual bool get_date(TIME *ltime,uint fuzzydate);
virtual bool get_time(TIME *ltime);
virtual bool get_date_result(TIME *ltime,uint fuzzydate)

View file

@ -2666,7 +2666,7 @@ void Item_cond::split_sum_func(THD *thd, Item **ref_pointer_array,
List_iterator<Item> li(list);
Item *item;
while ((item= li++))
item->split_sum_func2(thd, ref_pointer_array, fields, li.ref());
item->split_sum_func2(thd, ref_pointer_array, fields, li.ref(), TRUE);
}

View file

@ -280,7 +280,7 @@ void Item_func::split_sum_func(THD *thd, Item **ref_pointer_array,
{
Item **arg, **arg_end;
for (arg= args, arg_end= args+arg_count; arg != arg_end ; arg++)
(*arg)->split_sum_func2(thd, ref_pointer_array, fields, arg);
(*arg)->split_sum_func2(thd, ref_pointer_array, fields, arg, TRUE);
}

View file

@ -90,7 +90,7 @@ void Item_row::split_sum_func(THD *thd, Item **ref_pointer_array,
{
Item **arg, **arg_end;
for (arg= items, arg_end= items+arg_count; arg != arg_end ; arg++)
(*arg)->split_sum_func2(thd, ref_pointer_array, fields, arg);
(*arg)->split_sum_func2(thd, ref_pointer_array, fields, arg, TRUE);
}

View file

@ -1864,7 +1864,7 @@ String *Item_func_elt::val_str(String *str)
void Item_func_make_set::split_sum_func(THD *thd, Item **ref_pointer_array,
List<Item> &fields)
{
item->split_sum_func2(thd, ref_pointer_array, fields, &item);
item->split_sum_func2(thd, ref_pointer_array, fields, &item, TRUE);
Item_str_func::split_sum_func(thd, ref_pointer_array, fields);
}

View file

@ -803,6 +803,7 @@ Item_in_subselect::single_value_transformer(JOIN *join,
!(select_lex->next_select()))
{
Item_sum_hybrid *item;
nesting_map save_allow_sum_func;
if (func->l_op())
{
/*
@ -828,6 +829,8 @@ Item_in_subselect::single_value_transformer(JOIN *join,
it.replace(item);
}
save_allow_sum_func= thd->lex->allow_sum_func;
thd->lex->allow_sum_func|= 1 << thd->lex->current_select->nest_level;
/*
Item_sum_(max|min) can't substitute other item => we can use 0 as
reference, also Item_sum_(max|min) can't be fixed after creation, so
@ -835,6 +838,7 @@ Item_in_subselect::single_value_transformer(JOIN *join,
*/
if (item->fix_fields(thd, 0))
DBUG_RETURN(RES_ERROR);
thd->lex->allow_sum_func= save_allow_sum_func;
/* we added aggregate function => we have to change statistic */
count_field_types(&join->tmp_table_param, join->all_fields, 0);

View file

@ -24,6 +24,234 @@
#include "mysql_priv.h"
#include "sql_select.h"
/*
Prepare an aggregate function item for checking context conditions
SYNOPSIS
init_sum_func_check()
thd reference to the thread context info
DESCRIPTION
The function initializes the members of the Item_sum object created
for a set function that are used to check validity of the set function
occurrence.
If the set function is not allowed in any subquery where it occurs
an error is reported immediately.
NOTES
This function is to be called for any item created for a set function
object when the traversal of trees built for expressions used in the query
is performed at the phase of context analysis. This function is to
be invoked at the descent of this traversal.
RETURN
TRUE if an error is reported
FALSE otherwise
*/
bool Item_sum::init_sum_func_check(THD *thd)
{
if (!thd->lex->allow_sum_func)
{
my_message(ER_INVALID_GROUP_FUNC_USE, ER(ER_INVALID_GROUP_FUNC_USE),
MYF(0));
return TRUE;
}
/* Set a reference to the nesting set function if there is any */
in_sum_func= thd->lex->in_sum_func;
/* Save a pointer to object to be used in items for nested set functions */
thd->lex->in_sum_func= this;
nest_level= thd->lex->current_select->nest_level;
ref_by= 0;
aggr_level= -1;
max_arg_level= -1;
max_sum_func_level= -1;
return FALSE;
}
/*
Check constraints imposed on a usage of a set function
SYNOPSIS
check_sum_func()
thd reference to the thread context info
ref location of the pointer to this item in the embedding expression
DESCRIPTION
The method verifies whether context conditions imposed on a usage
of any set function are met for this occurrence.
It checks whether the set function occurs in the position where it
can be aggregated and, when it happens to occur in argument of another
set function, the method checks that these two functions are aggregated in
different subqueries.
If the context conditions are not met the method reports an error.
If the set function is aggregated in some outer subquery the method
adds it to the chain of items for such set functions that is attached
to the the st_select_lex structure for this subquery.
NOTES
This function is to be called for any item created for a set function
object when the traversal of trees built for expressions used in the query
is performed at the phase of context analysis. This function is to
be invoked at the ascent of this traversal.
IMPLEMENTATION
A number of designated members of the object are used to check the
conditions. They are specified in the comment before the Item_sum
class declaration.
Additionally a bitmap variable called allow_sum_func is employed.
It is included into the thd->lex structure.
The bitmap contains 1 at n-th position if the set function happens
to occur under a construct of the n-th level subquery where usage
of set functions are allowed (i.e either in the SELECT list or
in the HAVING clause of the corresponding subquery)
Consider the query:
SELECT SUM(t1.b) FROM t1 GROUP BY t1.a
HAVING t1.a IN (SELECT t2.c FROM t2 WHERE AVG(t1.b) > 20) AND
t1.a > (SELECT MIN(t2.d) FROM t2);
allow_sum_func will contain:
for SUM(t1.b) - 1 at the first position
for AVG(t1.b) - 1 at the first position, 0 at the second position
for MIN(t2.d) - 1 at the first position, 1 at the second position.
RETURN
TRUE if an error is reported
FALSE otherwise
*/
bool Item_sum::check_sum_func(THD *thd, Item **ref)
{
bool invalid= FALSE;
nesting_map allow_sum_func= thd->lex->allow_sum_func;
/*
The value of max_arg_level is updated if an argument of the set function
contains a column reference resolved against a subquery whose level is
greater than the current value of max_arg_level.
max_arg_level cannot be greater than nest level.
nest level is always >= 0
*/
if (nest_level == max_arg_level)
{
/*
The function must be aggregated in the current subquery,
If it is there under a construct where it is not allowed
we report an error.
*/
invalid= !(allow_sum_func & (1 << max_arg_level));
}
else if (max_arg_level >= 0 || !(allow_sum_func & (1 << nest_level)))
{
/*
The set function can be aggregated only in outer subqueries.
Try to find a subquery where it can be aggregated;
If we fail to find such a subquery report an error.
*/
if (register_sum_func(thd, ref))
return TRUE;
invalid= aggr_level < 0 && !(allow_sum_func & (1 << nest_level));
}
if (!invalid && aggr_level < 0)
aggr_level= nest_level;
/*
By this moment we either found a subquery where the set function is
to be aggregated and assigned a value that is >= 0 to aggr_level,
or set the value of 'invalid' to TRUE to report later an error.
*/
/*
Additionally we have to check whether possible nested set functions
are acceptable here: they are not, if the level of aggregation of
some of them is less than aggr_level.
*/
invalid= aggr_level <= max_sum_func_level;
if (invalid)
{
my_message(ER_INVALID_GROUP_FUNC_USE, ER(ER_INVALID_GROUP_FUNC_USE),
MYF(0));
return TRUE;
}
if (in_sum_func && in_sum_func->nest_level == nest_level)
{
/*
If the set function is nested adjust the value of
max_sum_func_level for the nesting set function.
*/
set_if_bigger(in_sum_func->max_sum_func_level, aggr_level);
}
thd->lex->in_sum_func= in_sum_func;
return FALSE;
}
/*
Attach a set function to the subquery where it must be aggregated
SYNOPSIS
register_sum_func()
thd reference to the thread context info
ref location of the pointer to this item in the embedding expression
DESCRIPTION
The function looks for an outer subquery where the set function must be
aggregated. If it finds such a subquery then aggr_level is set to
the nest level of this subquery and the item for the set function
is added to the list of set functions used in nested subqueries
inner_sum_func_list defined for each subquery. When the item is placed
there the field 'ref_by' is set to ref.
NOTES.
Now we 'register' only set functions that are aggregated in outer
subqueries. Actually it makes sense to link all set function for
a subquery in one chain. It would simplify the process of 'splitting'
for set functions.
RETURN
FALSE if the executes without failures (currently always)
TRUE otherwise
*/
bool Item_sum::register_sum_func(THD *thd, Item **ref)
{
SELECT_LEX *sl;
SELECT_LEX *aggr_sl= NULL;
nesting_map allow_sum_func= thd->lex->allow_sum_func;
for (sl= thd->lex->current_select->master_unit()->outer_select() ;
sl && sl->nest_level > max_arg_level;
sl= sl->master_unit()->outer_select() )
{
if (aggr_level < 0 && (allow_sum_func & (1 << sl->nest_level)))
{
/* Found the most nested subquery where the function can be aggregated */
aggr_level= sl->nest_level;
aggr_sl= sl;
}
}
if (sl && (allow_sum_func & (1 << sl->nest_level)))
{
/*
We reached the subquery of level max_arg_level and checked
that the function can be aggregated here.
The set function will be aggregated in this subquery.
*/
aggr_level= sl->nest_level;
aggr_sl= sl;
}
if (aggr_level >= 0)
{
ref_by= ref;
/* Add the object to the list of registered objects assigned to aggr_sl */
if (!aggr_sl->inner_sum_func_list)
next= this;
else
{
next= aggr_sl->inner_sum_func_list->next;
aggr_sl->inner_sum_func_list->next= this;
}
aggr_sl->inner_sum_func_list= this;
}
return FALSE;
}
Item_sum::Item_sum(List<Item> &list)
:arg_count(list.elements)
{
@ -197,13 +425,9 @@ Item_sum_num::fix_fields(THD *thd, Item **ref)
{
DBUG_ASSERT(fixed == 0);
if (!thd->allow_sum_func)
{
my_message(ER_INVALID_GROUP_FUNC_USE, ER(ER_INVALID_GROUP_FUNC_USE),
MYF(0));
if (init_sum_func_check(thd))
return TRUE;
}
thd->allow_sum_func=0; // No included group funcs
decimals=0;
maybe_null=0;
for (uint i=0 ; i < arg_count ; i++)
@ -217,7 +441,10 @@ Item_sum_num::fix_fields(THD *thd, Item **ref)
max_length=float_length(decimals);
null_value=1;
fix_length_and_dec();
thd->allow_sum_func=1; // Allow group functions
if (check_sum_func(thd, ref))
return TRUE;
fixed= 1;
return FALSE;
}
@ -258,13 +485,9 @@ Item_sum_hybrid::fix_fields(THD *thd, Item **ref)
DBUG_ASSERT(fixed == 0);
Item *item= args[0];
if (!thd->allow_sum_func)
{
my_message(ER_INVALID_GROUP_FUNC_USE, ER(ER_INVALID_GROUP_FUNC_USE),
MYF(0));
if (init_sum_func_check(thd))
return TRUE;
}
thd->allow_sum_func=0; // No included group funcs
// 'item' can be changed during fix_fields
if (!item->fixed &&
@ -300,11 +523,14 @@ Item_sum_hybrid::fix_fields(THD *thd, Item **ref)
result_field=0;
null_value=1;
fix_length_and_dec();
thd->allow_sum_func=1; // Allow group functions
if (item->type() == Item::FIELD_ITEM)
hybrid_field_type= ((Item_field*) item)->field->type();
else
hybrid_field_type= Item::field_type();
if (check_sum_func(thd, ref))
return TRUE;
fixed= 1;
return FALSE;
}
@ -2979,14 +3205,9 @@ Item_func_group_concat::fix_fields(THD *thd, Item **ref)
uint i; /* for loop variable */
DBUG_ASSERT(fixed == 0);
if (!thd->allow_sum_func)
{
my_message(ER_INVALID_GROUP_FUNC_USE, ER(ER_INVALID_GROUP_FUNC_USE),
MYF(0));
if (init_sum_func_check(thd))
return TRUE;
}
thd->allow_sum_func= 0;
maybe_null= 1;
/*
@ -3008,8 +3229,11 @@ Item_func_group_concat::fix_fields(THD *thd, Item **ref)
result.set_charset(collation.collation);
result_field= 0;
null_value= 1;
thd->allow_sum_func= 1;
max_length= thd->variables.group_concat_max_len;
if (check_sum_func(thd, ref))
return TRUE;
fixed= 1;
return FALSE;
}

View file

@ -23,6 +23,200 @@
#include <my_tree.h>
/*
Class Item_sum is the base class used for special expressions that SQL calls
'set functions'. These expressions are formed with the help of aggregate
functions such as SUM, MAX, GROUP_CONCAT etc.
GENERAL NOTES
A set function can be used not in any position where an expression is
accepted. There are some quite explicable restrictions for the usage of
set functions.
In the query:
SELECT AVG(b) FROM t1 WHERE SUM(b) > 20 GROUP by a
the usage of the set function AVG(b) is legal, while the usage of SUM(b)
is illegal. A WHERE condition must contain expressions that can be
evaluated for each row of the table. Yet the expression SUM(b) can be
evaluated only for each group of rows with the same value of column a.
In the query:
SELECT AVG(b) FROM t1 WHERE c > 30 GROUP BY a HAVING SUM(b) > 20
both set function expressions AVG(b) and SUM(b) are legal.
We can say that in a query without nested selects an occurrence of a
set function in an expression of the SELECT list or/and in the HAVING
clause is legal, while in the WHERE clause it's illegal.
The general rule to detect whether a set function is legal in a query with
nested subqueries is much more complicated.
Consider the the following query:
SELECT t1.a FROM t1 GROUP BY t1.a
HAVING t1.a > ALL (SELECT t2.c FROM t2 WHERE SUM(t1.b) < t2.c).
The set function SUM(b) is used here in the WHERE clause of the subquery.
Nevertheless it is legal since it is under the HAVING clause of the query
to which this function relates. The expression SUM(t1.b) is evaluated
for each group defined in the main query, not for groups of the subquery.
The problem of finding the query where to aggregate a particular
set function is not so simple is it seems to be.
In the query:
SELECT t1.a FROM t1 GROUP BY t1.a
HAVING t1.a > ALL(SELECT t2.c FROM t2 GROUP BY t2.c
HAVING SUM(t1.a) < t2.c)
the set function can be evaluated for both outer and inner selects.
If we evaluate SUM(t.a) for the outer query then we get the value of t.a
multiplied by the cardinality of a group in table t1. In this case
in each correlated subquery SUM(t1.a) is used as a constant. But we also
can evaluate SUM(t.a) for the inner query. In this case t.a will be a
constant for each correlated subquery and summation is performed
for each group of table t2.
(Here it makes sense to remind that the query
SELECT c FROM t GROUP BY a HAVING SUM(1) < a
is quite legal in our SQL).
So depending on to what query we assign the set function we
can get different result sets.
The general rule to detect the query where a set function is to be
evaluated can be formulated as follows.
Consider a set function S(E) where E is an expression with occurrences
of column references C1, ..., CN. Resolve these references against
subqueries whose subexpression the set function S(E) is. Let Q be the
most inner subquery of those subqueries. (It should be noted here that S(E)
in no way can be evaluated in the subquery embedding the subquery Q.)
If S(E) is used in a construct of Q where set function are allowed then
we evaluate S(E) in Q.
Otherwise we look for a most inner subquery containing S(E) of those where
usage of S(E) is allowed.
Let's demonstrate how this rule is applied to the following queries.
1. SELECT t1.a FROM t1 GROUP BY t1.a
HAVING t1.a > ALL(SELECT t2.b FROM t2 GROUP BY t2.b
HAVING t2.b > ALL(SELECT t3.c FROM t3 GROUP BY t3.c
HAVING SUM(t1.a+t2.b) < t3.c))
For this query the set function SUM(t1.a+t2.b) depends on t1.a and t2.b
with t1.a defined in the most outer query, and t2.b defined for its
subquery. The set function is in the HAVING clause of the subquery and can
be evaluated in this subquery.
2. SELECT t1.a FROM t1 GROUP BY t1.a
HAVING t1.a > ALL(SELECT t2.b FROM t2
WHERE t2.b > ALL (SELECT t3.c FROM t3 GROUP BY t3.c
HAVING SUM(t1.a+t2.b) < t3.c))
Here the set function SUM(t1.a+t2.b)is in the WHERE clause of the second
subquery - the most upper subquery where t1.a and t2.b are defined.
If we evaluate the function in this subquery we violate the context rules.
So we evaluate the function in the third subquery where it is used under
the HAVING clause.
3. SELECT t1.a FROM t1 GROUP BY t1.a
HAVING t1.a > ALL(SELECT t2.b FROM t2
WHERE t2.b > ALL (SELECT t3.c FROM t3
WHERE SUM(t1.a+t2.b) < t3.c))
In this query evaluation of SUM(t1.a+t2.b) is not legal neither in the second
nor in the third subqueries. So this query is invalid.
Mostly set functions cannot be nested. In the query
SELECT t1.a from t1 GROUP BY t1.a HAVING AVG(SUM(t1.b)) > 20
the expression SUM(b) is not acceptable, though it is under a HAVING clause.
It is acceptable in the query:
SELECT t.1 FROM t1 GROUP BY t1.a HAVING SUM(t1.b) > 20.
An argument of a set function does not have to be a reference to a table
column as we saw it in examples above. This can be a more complex expression
SELECT t1.a FROM t1 GROUP BY t1.a HAVING SUM(t1.b+1) > 20.
The expression SUM(t1.b+1) has a very clear semantics in this context:
we sum up the values of t1.b+1 where t1.b varies for all values within a
group of rows that contain the same t1.a value.
A set function for an outer query yields a constant within a subquery. So
the semantics of the query
SELECT t1.a FROM t1 GROUP BY t1.a
HAVING t1.a IN (SELECT t2.c FROM t2 GROUP BY t2.c
HAVING AVG(t2.c+SUM(t1.b)) > 20)
is still clear too. For a group of the rows with the same t1.a values we
calculate the value of SUM(t1.b). This value 's' is substituted in the
the subquery:
SELECT t2.c FROM t2 GROUP BY t2.c HAVING AVG(t2.c+s)
than returns some result set.
By the same reason the following query with a subquery
SELECT t1.a FROM t1 GROUP BY t1.a
HAVING t1.a IN (SELECT t2.c FROM t2 GROUP BY t2.c
HAVING AVG(SUM(t1.b)) > 20)
is also acceptable.
IMPLEMENTATION NOTES
Three methods were added to the class to check the constraints specified
in the previous section. This methods utilize several new members.
The field 'nest_level' contains the number of the level for the subquery
containing the set function. The main SELECT is of level 0, its subqueries
are of levels 1, the subqueries of the latters are of level 2 and so on.
The field 'aggr_level' is to contain the nest level of the subquery
where the set function is aggregated.
The field 'max_arg_level' is for the maximun of the nest levels of the
unbound column references occurred in the set function. A column reference
is unbound within a set function if it is not bound by any subquery
used as a subexpression in this function. A column reference is bound by
a subquery if it is a reference to the column by which the aggregation
of some set function that is used in the subquery is calculated.
For the set function used in the query
SELECT t1.a FROM t1 GROUP BY t1.a
HAVING t1.a > ALL(SELECT t2.b FROM t2 GROUP BY t2.b
HAVING t2.b > ALL(SELECT t3.c FROM t3 GROUP BY t3.c
HAVING SUM(t1.a+t2.b) < t3.c))
the value of max_arg_level is equal to 1 since t1.a is bound in the main
query, and t2.b is bound by the first subquery whose nest level 1.
Obviously a set function cannot be aggregated in the subquery whose
nest level is less than max_arg_level. (Yet it can be aggregated in the
subqueries whose nest level is greater than max_arg_level.)
In the query
SELECT t.a FROM t1 HAVING AVG(t1.a+(SELECT MIN(t2.c) FROM t2))
the value of the max_arg_level for the AVG set function is 0 since
the reference t2.c is bound in the subquery.
The field 'max_sum_func_level' is to contain the maximum of the
nest levels of the set functions that are used as subexpressions of
the arguments of the given set function, but not aggregated in any
subquery within this set function. A nested set function s1 can be
used within set function s0 only if s1.max_sum_func_level <
s0.max_sum_func_level. Set function s1 is considered as nested
for set function s0 if s1 is not calculated in any subquery
within s0.
A set function that as a subexpression in an argument of another set
function refers to the latter via the field 'in_sum_func'.
The condition imposed on the usage of set functions are checked when
we traverse query subexpressions with the help of recursive method
fix_fields. When we apply this method to an object of the class
Item_sum, first, on the descent, we call the method init_sum_func_check
that initialize members used at checking. Then, on the ascent, we
call the method check_sum_func that validates the set function usage
and reports an error if it is illegal.
The method register_sum_func serves to link the items for the set functions
that are aggregated in the embedding (sub)queries. Circular chains of such
functions are attached to the corresponding st_select_lex structures
through the field inner_sum_func_list.
Exploiting the fact that the members mentioned above are used in one
recursive function we could have allocated them on the thread stack.
Yet we don't do it now.
We assume that the nesting level of subquries does not exceed 127.
TODO: to catch queries where the limit is exceeded to make the
code clean here.
*/
class Item_sum :public Item_result_field
{
public:
@ -33,7 +227,14 @@ public:
};
Item **args, *tmp_args[2];
Item **ref_by; /* pointer to a ref to the object used to register it */
Item_sum *next; /* next in the circular chain of registered objects */
uint arg_count;
Item_sum *in_sum_func; /* embedding set function if any */
int8 nest_level; /* number of the nesting level of the set function */
int8 aggr_level; /* nesting level of the aggregating subquery */
int8 max_arg_level; /* max level of unbound column references */
int8 max_sum_func_level;/* max level of aggregation for embedded functions */
bool quick_group; /* If incremental update of fields */
void mark_as_sum_func();
@ -111,6 +312,9 @@ public:
virtual Field *create_tmp_field(bool group, TABLE *table,
uint convert_blob_length);
bool walk (Item_processor processor, byte *argument);
bool init_sum_func_check(THD *thd);
bool check_sum_func(THD *thd, Item **ref);
bool register_sum_func(THD *thd, Item **ref);
};

View file

@ -44,6 +44,7 @@
typedef ulonglong table_map; /* Used for table bits in join */
typedef Bitmap<64> key_map; /* Used for finding keys */
typedef ulong key_part_map; /* Used for finding key parts */
typedef ulong nesting_map; /* Used for flags of nesting constructs */
/* query_id */
typedef ulonglong query_id_t;

View file

@ -4267,11 +4267,13 @@ bool setup_fields(THD *thd, Item **ref_pointer_array,
{
reg2 Item *item;
bool save_set_query_id= thd->set_query_id;
nesting_map save_allow_sum_func= thd->lex->allow_sum_func;
List_iterator<Item> it(fields);
DBUG_ENTER("setup_fields");
thd->set_query_id=set_query_id;
thd->allow_sum_func= allow_sum_func;
if (allow_sum_func)
thd->lex->allow_sum_func|= 1 << thd->lex->current_select->nest_level;
thd->where="field list";
/*
@ -4294,6 +4296,7 @@ bool setup_fields(THD *thd, Item **ref_pointer_array,
if (!item->fixed && item->fix_fields(thd, it.ref()) ||
(item= *(it.ref()))->check_cols(1))
{
thd->lex->allow_sum_func= save_allow_sum_func;
thd->set_query_id= save_set_query_id;
DBUG_RETURN(TRUE); /* purecov: inspected */
}
@ -4304,6 +4307,7 @@ bool setup_fields(THD *thd, Item **ref_pointer_array,
item->split_sum_func(thd, ref_pointer_array, *sum_func_list);
thd->used_tables|= item->used_tables();
}
thd->lex->allow_sum_func= save_allow_sum_func;
thd->set_query_id= save_set_query_id;
DBUG_RETURN(test(thd->net.report_error));
}

View file

@ -1560,7 +1560,6 @@ Statement::Statement(enum enum_state state_arg, ulong id_arg,
:Query_arena(&main_mem_root, state_arg),
id(id_arg),
set_query_id(1),
allow_sum_func(0),
lex(&main_lex),
query(0),
query_length(0),
@ -1581,7 +1580,6 @@ void Statement::set_statement(Statement *stmt)
{
id= stmt->id;
set_query_id= stmt->set_query_id;
allow_sum_func= stmt->allow_sum_func;
lex= stmt->lex;
query= stmt->query;
query_length= stmt->query_length;

View file

@ -785,19 +785,6 @@ public:
field list can not contain duplicates.
*/
bool set_query_id;
/*
This variable is used in post-parse stage to declare that sum-functions,
or functions which have sense only if GROUP BY is present, are allowed.
For example in queries
SELECT MIN(i) FROM foo
SELECT GROUP_CONCAT(a, b, MIN(i)) FROM ... GROUP BY ...
MIN(i) have no sense.
Though it's grammar-related issue, it's hard to catch it out during the
parse stage because GROUP BY clause goes in the end of query. This
variable is mainly used in setup_fields/fix_fields.
See item_sum.cc for details.
*/
bool allow_sum_func;
LEX_STRING name; /* name for named prepared statements */
LEX *lex; // parse tree descriptor

View file

@ -313,7 +313,7 @@ bool mysql_prepare_delete(THD *thd, TABLE_LIST *table_list, Item **conds)
SELECT_LEX *select_lex= &thd->lex->select_lex;
DBUG_ENTER("mysql_prepare_delete");
thd->allow_sum_func= 0;
thd->lex->allow_sum_func= 0;
if (setup_tables(thd, &thd->lex->select_lex.context,
&thd->lex->select_lex.top_join_list,
table_list, conds, &select_lex->leaf_tables,

View file

@ -181,6 +181,9 @@ void lex_start(THD *thd, uchar *buf,uint length)
lex->sroutines_list.empty();
lex->sroutines_list_own_last= lex->sroutines_list.next;
lex->sroutines_list_own_elements= 0;
lex->nest_level=0 ;
lex->allow_sum_func= 0;
lex->in_sum_func= NULL;
DBUG_VOID_RETURN;
}
@ -1138,6 +1141,7 @@ void st_select_lex::init_query()
first_cond_optimization= 1;
parsing_place= NO_MATTER;
exclude_from_table_unique_test= no_wrap_view_item= FALSE;
nest_level= 0;
link_next= 0;
}
@ -1157,6 +1161,7 @@ void st_select_lex::init_select()
interval_list.empty();
use_index.empty();
ftfunc_list_alloc.empty();
inner_sum_func_list= 0;
ftfunc_list= &ftfunc_list_alloc;
linkage= UNSPECIFIED_TYPE;
order_list.elements= 0;

View file

@ -527,6 +527,8 @@ public:
ulong table_join_options;
uint in_sum_expr;
uint select_number; /* number of select (used for EXPLAIN) */
int nest_level; /* nesting level of select */
Item_sum *inner_sum_func_list; /* list of sum func in nested selects */
uint with_wild; /* item list contain '*' */
bool braces; /* SELECT ... UNION (SELECT ... ) <- this braces */
/* TRUE when having fix field called in processing of this SELECT */
@ -769,12 +771,23 @@ typedef struct st_lex
SQL_LIST proc_list, auxilliary_table_list, save_list;
create_field *last_field;
Item_sum *in_sum_func;
udf_func udf;
HA_CHECK_OPT check_opt; // check/repair options
HA_CREATE_INFO create_info;
LEX_MASTER_INFO mi; // used by CHANGE MASTER
USER_RESOURCES mqh;
ulong type;
/*
This variable is used in post-parse stage to declare that sum-functions,
or functions which have sense only if GROUP BY is present, are allowed.
For example in a query
SELECT ... FROM ...WHERE MIN(i) == 1 GROUP BY ... HAVING MIN(i) > 2
MIN(i) in the WHERE clause is not allowed in the opposite to MIN(i)
in the HAVING clause. Due to possible nesting of select construct
the variable can contain 0 or 1 for each nest level.
*/
nesting_map allow_sum_func;
enum_sql_command sql_command, orig_sql_command;
thr_lock_type lock_option;
enum SSL_type ssl_type; /* defined in violite.h */
@ -793,6 +806,7 @@ typedef struct st_lex
uint grant, grant_tot_col, which_columns;
uint fk_delete_opt, fk_update_opt, fk_match_option;
uint slave_thd_opt, start_transaction_opt;
int nest_level;
/*
In LEX representing update which were transformed to multi-update
stores total number of tables. For LEX representing multi-delete

View file

@ -5340,6 +5340,8 @@ mysql_new_select(LEX *lex, bool move_down)
select_lex->parent_lex= lex; /* Used in init_query. */
select_lex->init_query();
select_lex->init_select();
lex->nest_level++;
select_lex->nest_level= lex->nest_level;
/*
Don't evaluate this subquery during statement prepare even if
it's a constant one. The flag is switched off in the end of

View file

@ -2125,7 +2125,8 @@ void reinit_stmt_before_use(THD *thd, LEX *lex)
lex->result->cleanup();
lex->result->set_thd(thd);
}
thd->allow_sum_func= 0;
lex->allow_sum_func= 0;
lex->in_sum_func= NULL;
DBUG_VOID_RETURN;
}

View file

@ -270,21 +270,20 @@ inline int setup_without_group(THD *thd, Item **ref_pointer_array,
ORDER *order,
ORDER *group, bool *hidden_group_fields)
{
bool save_allow_sum_func;
int res;
nesting_map save_allow_sum_func=thd->lex->allow_sum_func ;
DBUG_ENTER("setup_without_group");
save_allow_sum_func= thd->allow_sum_func;
thd->allow_sum_func= 0;
thd->lex->allow_sum_func&= ~(1 << thd->lex->current_select->nest_level);
res= setup_conds(thd, tables, leaves, conds);
thd->allow_sum_func= save_allow_sum_func;
thd->lex->allow_sum_func|= 1 << thd->lex->current_select->nest_level;
res= res || setup_order(thd, ref_pointer_array, tables, fields, all_fields,
order);
thd->allow_sum_func= 0;
thd->lex->allow_sum_func&= ~(1 << thd->lex->current_select->nest_level);
res= res || setup_group(thd, ref_pointer_array, tables, fields, all_fields,
group, hidden_group_fields);
thd->allow_sum_func= save_allow_sum_func;
thd->lex->allow_sum_func= save_allow_sum_func;
DBUG_RETURN(res);
}
@ -351,8 +350,9 @@ JOIN::prepare(Item ***rref_pointer_array,
if (having)
{
nesting_map save_allow_sum_func= thd->lex->allow_sum_func;
thd->where="having clause";
thd->allow_sum_func=1;
thd->lex->allow_sum_func|= 1 << select_lex_arg->nest_level;
select_lex->having_fix_field= 1;
bool having_fix_rc= (!having->fixed &&
(having->fix_fields(thd, &having) ||
@ -362,6 +362,18 @@ JOIN::prepare(Item ***rref_pointer_array,
DBUG_RETURN(-1); /* purecov: inspected */
if (having->with_sum_func)
having->split_sum_func(thd, ref_pointer_array, all_fields);
thd->lex->allow_sum_func= save_allow_sum_func;
}
if (select_lex->inner_sum_func_list)
{
Item_sum *end=select_lex->inner_sum_func_list;
Item_sum *item_sum= end;
do
{
item_sum= item_sum->next;
item_sum->split_sum_func2(thd, ref_pointer_array,
all_fields, item_sum->ref_by, FALSE);
} while (item_sum != end);
}
if (!thd->lex->view_prepare_mode)
@ -5165,7 +5177,9 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
join->const_table_map|=RAND_TABLE_BIT;
{ // Check const tables
COND *const_cond=
make_cond_for_table(cond,join->const_table_map,(table_map) 0);
make_cond_for_table(cond,
join->const_table_map,
(table_map) 0);
DBUG_EXECUTE("where",print_where(const_cond,"constants"););
for (JOIN_TAB *tab= join->join_tab+join->const_tables;
tab < join->join_tab+join->tables ; tab++)

View file

@ -571,7 +571,7 @@ bool mysql_prepare_update(THD *thd, TABLE_LIST *table_list,
bzero((char*) &tables,sizeof(tables)); // For ORDER BY
tables.table= table;
tables.alias= table_list->alias;
thd->allow_sum_func= 0;
thd->lex->allow_sum_func= 0;
if (setup_tables(thd, &select_lex->context, &select_lex->top_join_list,
table_list, conds, &select_lex->leaf_tables,

View file

@ -8927,6 +8927,7 @@ subselect_end:
LEX *lex=Lex;
lex->pop_context();
lex->current_select = lex->current_select->return_after_parsing();
lex->nest_level--;
};
opt_view_list: