BUG#27927:Partition pruning not optimal with TO_DAYS and YEAR functions

- Introduced val_int_endpoint() function which converts between func 
  argument intervals and func value intervals for monotonic functions.
- Made partition interval analyzer use part_expr->val_int_endpoint()
  to check if the edge values should be included.


mysql-test/r/partition_pruning.result:
  BUG#27927: Partition pruning not optimal with TO_DAYS and YEAR functions
  - Testcase
mysql-test/t/partition_pruning.test:
  BUG#27927: Partition pruning not optimal with TO_DAYS and YEAR functions
  - Testcase
sql/item.cc:
  BUG#27927: Partition pruning not optimal with TO_DAYS and YEAR functions
  - Added Item_field::val_int_endpoint() implementation
sql/item.h:
  BUG#27927: Partition pruning not optimal with TO_DAYS and YEAR functions
  - Added Item::val_int_endpoint() which converts intervals from argument
    space to function value space for unary monotonic functions.
sql/item_timefunc.cc:
  BUG#27927: Partition pruning not optimal with TO_DAYS and YEAR functions
  - Added val_int_endpoint() for TO_DAYS and YEAR functions.
sql/item_timefunc.h:
  BUG#27927: Partition pruning not optimal with TO_DAYS and YEAR functions
  - Added val_int_endpoint() for TO_DAYS and YEAR functions.
sql/partition_info.h:
  BUG#27927: Partition pruning not optimal with TO_DAYS and YEAR functions
  - Removed partition_info::range_analysis_include_bounds as it is no longer 
    needed.
sql/sql_partition.cc:
  BUG#27927: Partition pruning not optimal with TO_DAYS and YEAR functions
  - Make partition interval analyzer use part_expr->val_int_endpoint() to 
    check if the edge values should be included.
This commit is contained in:
unknown 2007-09-14 14:18:42 +04:00
parent 9d01633571
commit 4aaabb06c0
8 changed files with 182 additions and 27 deletions

View file

@ -911,3 +911,31 @@ explain partitions select * from t1 where a>-2 and a <=0;
id select_type table partitions type possible_keys key key_len ref rows Extra
1 SIMPLE t1 p3 ALL NULL NULL NULL NULL 4 Using where
drop table t1;
CREATE TABLE t1 ( recdate DATETIME NOT NULL )
PARTITION BY RANGE( TO_DAYS(recdate) ) (
PARTITION p0 VALUES LESS THAN ( TO_DAYS('2007-03-08') ),
PARTITION p1 VALUES LESS THAN ( TO_DAYS('2007-04-01') )
);
INSERT INTO t1 VALUES ('2007-03-01 12:00:00');
INSERT INTO t1 VALUES ('2007-03-07 12:00:00');
INSERT INTO t1 VALUES ('2007-03-08 12:00:00');
INSERT INTO t1 VALUES ('2007-03-15 12:00:00');
must use p0 only:
explain partitions select * from t1 where recdate < '2007-03-08 00:00:00';
id select_type table partitions type possible_keys key key_len ref rows Extra
1 SIMPLE t1 p0 ALL NULL NULL NULL NULL 2 Using where
drop table t1;
CREATE TABLE t1 ( recdate DATETIME NOT NULL )
PARTITION BY RANGE( YEAR(recdate) ) (
PARTITION p0 VALUES LESS THAN (2006),
PARTITION p1 VALUES LESS THAN (2007)
);
INSERT INTO t1 VALUES ('2005-03-01 12:00:00');
INSERT INTO t1 VALUES ('2005-03-01 12:00:00');
INSERT INTO t1 VALUES ('2006-03-01 12:00:00');
INSERT INTO t1 VALUES ('2006-03-01 12:00:00');
must use p0 only:
explain partitions select * from t1 where recdate < '2006-01-01 00:00:00';
id select_type table partitions type possible_keys key key_len ref rows Extra
1 SIMPLE t1 p0 ALL NULL NULL NULL NULL 2 Using where
drop table t1;

View file

@ -761,3 +761,34 @@ insert into t1 values (-15),(-5),(5),(15),(-15),(-5),(5),(15);
explain partitions select * from t1 where a>-2 and a <=0;
drop table t1;
#
# BUG#27927 Partition pruning not optimal with TO_DAYS function
#
CREATE TABLE t1 ( recdate DATETIME NOT NULL )
PARTITION BY RANGE( TO_DAYS(recdate) ) (
PARTITION p0 VALUES LESS THAN ( TO_DAYS('2007-03-08') ),
PARTITION p1 VALUES LESS THAN ( TO_DAYS('2007-04-01') )
);
INSERT INTO t1 VALUES ('2007-03-01 12:00:00');
INSERT INTO t1 VALUES ('2007-03-07 12:00:00');
INSERT INTO t1 VALUES ('2007-03-08 12:00:00');
INSERT INTO t1 VALUES ('2007-03-15 12:00:00');
-- echo must use p0 only:
explain partitions select * from t1 where recdate < '2007-03-08 00:00:00';
drop table t1;
CREATE TABLE t1 ( recdate DATETIME NOT NULL )
PARTITION BY RANGE( YEAR(recdate) ) (
PARTITION p0 VALUES LESS THAN (2006),
PARTITION p1 VALUES LESS THAN (2007)
);
INSERT INTO t1 VALUES ('2005-03-01 12:00:00');
INSERT INTO t1 VALUES ('2005-03-01 12:00:00');
INSERT INTO t1 VALUES ('2006-03-01 12:00:00');
INSERT INTO t1 VALUES ('2006-03-01 12:00:00');
-- echo must use p0 only:
explain partitions select * from t1 where recdate < '2006-01-01 00:00:00';
drop table t1;

View file

@ -2069,6 +2069,11 @@ Item *Item_field::get_tmp_table_item(THD *thd)
return new_item;
}
longlong Item_field::val_int_endpoint(bool left_endp, bool *incl_endp)
{
longlong res= val_int();
return null_value? LONGLONG_MIN : res;
}
/*
Create an item from a string we KNOW points to a valid longlong

View file

@ -569,6 +569,43 @@ public:
virtual enum_monotonicity_info get_monotonicity_info() const
{ return NON_MONOTONIC; }
/*
Convert "func_arg $CMP$ const" half-interval into "FUNC(func_arg) $CMP2$ const2"
SYNOPSIS
val_int_endpoint()
left_endp FALSE <=> The interval is "x < const" or "x <= const"
TRUE <=> The interval is "x > const" or "x >= const"
incl_endp IN TRUE <=> the comparison is '<' or '>'
FALSE <=> the comparison is '<=' or '>='
OUT The same but for the "F(x) $CMP$ F(const)" comparison
DESCRIPTION
This function is defined only for unary monotonic functions. The caller
supplies the source half-interval
x $CMP$ const
The value of const is supplied implicitly as the value this item's
argument, the form of $CMP$ comparison is specified through the
function's arguments. The calle returns the result interval
F(x) $CMP2$ F(const)
passing back F(const) as the return value, and the form of $CMP2$
through the out parameter. NULL values are assumed to be comparable and
be less than any non-NULL values.
RETURN
The output range bound, which equal to the value of val_int()
- If the value of the function is NULL then the bound is the
smallest possible value of LONGLONG_MIN
*/
virtual longlong val_int_endpoint(bool left_endp, bool *incl_endp)
{ DBUG_ASSERT(0); }
/* valXXX methods must return NULL or 0 or 0.0 if null_value is set. */
/*
Return double precision floating point representation of item.
@ -1401,6 +1438,7 @@ public:
{
return MONOTONIC_STRICT_INCREASING;
}
longlong val_int_endpoint(bool left_endp, bool *incl_endp);
Field *get_tmp_table_field() { return result_field; }
Field *tmp_table_field(TABLE *t_arg) { return result_field; }
bool get_date(MYSQL_TIME *ltime,uint fuzzydate);

View file

@ -962,6 +962,44 @@ enum_monotonicity_info Item_func_to_days::get_monotonicity_info() const
}
longlong Item_func_to_days::val_int_endpoint(bool left_endp, bool *incl_endp)
{
DBUG_ASSERT(fixed == 1);
MYSQL_TIME ltime;
longlong res;
if (get_arg0_date(&ltime, TIME_NO_ZERO_DATE))
{
/* got NULL, leave the incl_endp intact */
return LONGLONG_MIN;
}
res=(longlong) calc_daynr(ltime.year,ltime.month,ltime.day);
if (args[0]->field_type() == MYSQL_TYPE_DATE)
{
// TO_DAYS() is strictly monotonic for dates, leave incl_endp intact
return res;
}
/*
Handle the special but practically useful case of datetime values that
point to day bound ("strictly less" comparison stays intact):
col < '2007-09-15 00:00:00' -> TO_DAYS(col) < TO_DAYS('2007-09-15')
which is different from the general case ("strictly less" changes to
"less or equal"):
col < '2007-09-15 12:34:56' -> TO_DAYS(col) <= TO_DAYS('2007-09-15')
*/
if (!left_endp && !(ltime.hour || ltime.minute || ltime.second ||
ltime.second_part))
; /* do nothing */
else
*incl_endp= TRUE;
return res;
}
longlong Item_func_dayofyear::val_int()
{
DBUG_ASSERT(fixed == 1);
@ -1152,7 +1190,7 @@ longlong Item_func_year::val_int()
Get information about this Item tree monotonicity
SYNOPSIS
Item_func_to_days::get_monotonicity_info()
Item_func_year::get_monotonicity_info()
DESCRIPTION
Get information about monotonicity of the function represented by this item
@ -1171,6 +1209,37 @@ enum_monotonicity_info Item_func_year::get_monotonicity_info() const
return NON_MONOTONIC;
}
longlong Item_func_year::val_int_endpoint(bool left_endp, bool *incl_endp)
{
DBUG_ASSERT(fixed == 1);
MYSQL_TIME ltime;
if (get_arg0_date(&ltime, TIME_FUZZY_DATE))
{
/* got NULL, leave the incl_endp intact */
return LONGLONG_MIN;
}
/*
Handle the special but practically useful case of datetime values that
point to year bound ("strictly less" comparison stays intact) :
col < '2007-01-01 00:00:00' -> YEAR(col) < 2007
which is different from the general case ("strictly less" changes to
"less or equal"):
col < '2007-09-15 23:00:00' -> YEAR(col) <= 2007
*/
if (!left_endp && ltime.day == 1 && ltime.month == 1 &&
!(ltime.hour || ltime.minute || ltime.second || ltime.second_part))
; /* do nothing */
else
*incl_endp= TRUE;
return ltime.year;
}
longlong Item_func_unix_timestamp::val_int()
{
MYSQL_TIME ltime;

View file

@ -68,6 +68,7 @@ public:
maybe_null=1;
}
enum_monotonicity_info get_monotonicity_info() const;
longlong val_int_endpoint(bool left_endp, bool *incl_endp);
bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
};
@ -248,6 +249,7 @@ public:
longlong val_int();
const char *func_name() const { return "year"; }
enum_monotonicity_info get_monotonicity_info() const;
longlong val_int_endpoint(bool left_endp, bool *incl_endp);
void fix_length_and_dec()
{
decimals=0;

View file

@ -139,20 +139,6 @@ public:
*/
get_partitions_in_range_iter get_subpart_iter_for_interval;
/*
Valid iff
get_part_iter_for_interval=get_part_iter_for_interval_via_walking:
controls how we'll process "field < C" and "field > C" intervals.
If the partitioning function F is strictly increasing, then for any x, y
"x < y" => "F(x) < F(y)" (*), i.e. when we get interval "field < C"
we can perform partition pruning on the equivalent "F(field) < F(C)".
If the partitioning function not strictly increasing (it is simply
increasing), then instead of (*) we get "x < y" => "F(x) <= F(y)"
i.e. for interval "field < C" we can perform partition pruning for
"F(field) <= F(C)".
*/
bool range_analysis_include_bounds;
/********************************************
* INTERVAL ANALYSIS ENDS
********************************************/

View file

@ -2743,7 +2743,8 @@ uint32 get_list_array_idx_for_endpoint(partition_info *part_info,
uint min_list_index= 0, max_list_index= part_info->no_list_values - 1;
longlong list_value;
/* Get the partitioning function value for the endpoint */
longlong part_func_value= part_val_int(part_info->part_expr);
longlong part_func_value=
part_info->part_expr->val_int_endpoint(left_endpoint, &include_endpoint);
bool unsigned_flag= part_info->part_expr->unsigned_flag;
DBUG_ENTER("get_list_array_idx_for_endpoint");
@ -2887,7 +2888,9 @@ uint32 get_partition_id_range_for_endpoint(partition_info *part_info,
uint max_partition= part_info->no_parts - 1;
uint min_part_id= 0, max_part_id= max_partition, loc_part_id;
/* Get the partitioning function value for the endpoint */
longlong part_func_value= part_val_int(part_info->part_expr);
longlong part_func_value=
part_info->part_expr->val_int_endpoint(left_endpoint, &include_endpoint);
bool unsigned_flag= part_info->part_expr->unsigned_flag;
DBUG_ENTER("get_partition_id_range_for_endpoint");
@ -6590,8 +6593,6 @@ void make_used_partitions_str(partition_info *part_info, String *parts_str)
#ifdef WITH_PARTITION_STORAGE_ENGINE
static void set_up_range_analysis_info(partition_info *part_info)
{
enum_monotonicity_info minfo;
/* Set the catch-all default */
part_info->get_part_iter_for_interval= NULL;
part_info->get_subpart_iter_for_interval= NULL;
@ -6603,11 +6604,8 @@ static void set_up_range_analysis_info(partition_info *part_info)
switch (part_info->part_type) {
case RANGE_PARTITION:
case LIST_PARTITION:
minfo= part_info->part_expr->get_monotonicity_info();
if (minfo != NON_MONOTONIC)
if (part_info->part_expr->get_monotonicity_info() != NON_MONOTONIC)
{
part_info->range_analysis_include_bounds=
test(minfo == MONOTONIC_INCREASING);
part_info->get_part_iter_for_interval=
get_part_iter_for_interval_via_mapping;
goto setup_subparts;
@ -6775,8 +6773,7 @@ int get_part_iter_for_interval_via_mapping(partition_info *part_info,
index-in-ordered-array-of-list-constants (for LIST) space.
*/
store_key_image_to_rec(field, min_value, field_len);
bool include_endp= part_info->range_analysis_include_bounds ||
!test(flags & NEAR_MIN);
bool include_endp= !test(flags & NEAR_MIN);
part_iter->part_nums.start= get_endpoint(part_info, 1, include_endp);
part_iter->part_nums.cur= part_iter->part_nums.start;
if (part_iter->part_nums.start == max_endpoint_val)
@ -6790,8 +6787,7 @@ int get_part_iter_for_interval_via_mapping(partition_info *part_info,
else
{
store_key_image_to_rec(field, max_value, field_len);
bool include_endp= part_info->range_analysis_include_bounds ||
!test(flags & NEAR_MAX);
bool include_endp= !test(flags & NEAR_MAX);
part_iter->part_nums.end= get_endpoint(part_info, 0, include_endp);
if (part_iter->part_nums.start == part_iter->part_nums.end &&
!part_iter->ret_null_part)