From 4aaabb06c011e20054613bff225823b3d66a135e Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 14 Sep 2007 14:18:42 +0400 Subject: [PATCH] BUG#27927:Partition pruning not optimal with TO_DAYS and YEAR functions - Introduced val_int_endpoint() function which converts between func argument intervals and func value intervals for monotonic functions. - Made partition interval analyzer use part_expr->val_int_endpoint() to check if the edge values should be included. mysql-test/r/partition_pruning.result: BUG#27927: Partition pruning not optimal with TO_DAYS and YEAR functions - Testcase mysql-test/t/partition_pruning.test: BUG#27927: Partition pruning not optimal with TO_DAYS and YEAR functions - Testcase sql/item.cc: BUG#27927: Partition pruning not optimal with TO_DAYS and YEAR functions - Added Item_field::val_int_endpoint() implementation sql/item.h: BUG#27927: Partition pruning not optimal with TO_DAYS and YEAR functions - Added Item::val_int_endpoint() which converts intervals from argument space to function value space for unary monotonic functions. sql/item_timefunc.cc: BUG#27927: Partition pruning not optimal with TO_DAYS and YEAR functions - Added val_int_endpoint() for TO_DAYS and YEAR functions. sql/item_timefunc.h: BUG#27927: Partition pruning not optimal with TO_DAYS and YEAR functions - Added val_int_endpoint() for TO_DAYS and YEAR functions. sql/partition_info.h: BUG#27927: Partition pruning not optimal with TO_DAYS and YEAR functions - Removed partition_info::range_analysis_include_bounds as it is no longer needed. sql/sql_partition.cc: BUG#27927: Partition pruning not optimal with TO_DAYS and YEAR functions - Make partition interval analyzer use part_expr->val_int_endpoint() to check if the edge values should be included. --- mysql-test/r/partition_pruning.result | 28 +++++++++++ mysql-test/t/partition_pruning.test | 31 ++++++++++++ sql/item.cc | 5 ++ sql/item.h | 38 ++++++++++++++ sql/item_timefunc.cc | 71 ++++++++++++++++++++++++++- sql/item_timefunc.h | 2 + sql/partition_info.h | 14 ------ sql/sql_partition.cc | 20 +++----- 8 files changed, 182 insertions(+), 27 deletions(-) diff --git a/mysql-test/r/partition_pruning.result b/mysql-test/r/partition_pruning.result index 776e6f3a15a..2dc37f7ed43 100644 --- a/mysql-test/r/partition_pruning.result +++ b/mysql-test/r/partition_pruning.result @@ -911,3 +911,31 @@ explain partitions select * from t1 where a>-2 and a <=0; id select_type table partitions type possible_keys key key_len ref rows Extra 1 SIMPLE t1 p3 ALL NULL NULL NULL NULL 4 Using where drop table t1; +CREATE TABLE t1 ( recdate DATETIME NOT NULL ) +PARTITION BY RANGE( TO_DAYS(recdate) ) ( +PARTITION p0 VALUES LESS THAN ( TO_DAYS('2007-03-08') ), +PARTITION p1 VALUES LESS THAN ( TO_DAYS('2007-04-01') ) +); +INSERT INTO t1 VALUES ('2007-03-01 12:00:00'); +INSERT INTO t1 VALUES ('2007-03-07 12:00:00'); +INSERT INTO t1 VALUES ('2007-03-08 12:00:00'); +INSERT INTO t1 VALUES ('2007-03-15 12:00:00'); +must use p0 only: +explain partitions select * from t1 where recdate < '2007-03-08 00:00:00'; +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE t1 p0 ALL NULL NULL NULL NULL 2 Using where +drop table t1; +CREATE TABLE t1 ( recdate DATETIME NOT NULL ) +PARTITION BY RANGE( YEAR(recdate) ) ( +PARTITION p0 VALUES LESS THAN (2006), +PARTITION p1 VALUES LESS THAN (2007) +); +INSERT INTO t1 VALUES ('2005-03-01 12:00:00'); +INSERT INTO t1 VALUES ('2005-03-01 12:00:00'); +INSERT INTO t1 VALUES ('2006-03-01 12:00:00'); +INSERT INTO t1 VALUES ('2006-03-01 12:00:00'); +must use p0 only: +explain partitions select * from t1 where recdate < '2006-01-01 00:00:00'; +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE t1 p0 ALL NULL NULL NULL NULL 2 Using where +drop table t1; diff --git a/mysql-test/t/partition_pruning.test b/mysql-test/t/partition_pruning.test index 12951c9232a..31008d2b011 100644 --- a/mysql-test/t/partition_pruning.test +++ b/mysql-test/t/partition_pruning.test @@ -761,3 +761,34 @@ insert into t1 values (-15),(-5),(5),(15),(-15),(-5),(5),(15); explain partitions select * from t1 where a>-2 and a <=0; drop table t1; + +# +# BUG#27927 Partition pruning not optimal with TO_DAYS function +# + +CREATE TABLE t1 ( recdate DATETIME NOT NULL ) +PARTITION BY RANGE( TO_DAYS(recdate) ) ( + PARTITION p0 VALUES LESS THAN ( TO_DAYS('2007-03-08') ), + PARTITION p1 VALUES LESS THAN ( TO_DAYS('2007-04-01') ) +); +INSERT INTO t1 VALUES ('2007-03-01 12:00:00'); +INSERT INTO t1 VALUES ('2007-03-07 12:00:00'); +INSERT INTO t1 VALUES ('2007-03-08 12:00:00'); +INSERT INTO t1 VALUES ('2007-03-15 12:00:00'); +-- echo must use p0 only: +explain partitions select * from t1 where recdate < '2007-03-08 00:00:00'; + +drop table t1; +CREATE TABLE t1 ( recdate DATETIME NOT NULL ) +PARTITION BY RANGE( YEAR(recdate) ) ( + PARTITION p0 VALUES LESS THAN (2006), + PARTITION p1 VALUES LESS THAN (2007) +); +INSERT INTO t1 VALUES ('2005-03-01 12:00:00'); +INSERT INTO t1 VALUES ('2005-03-01 12:00:00'); +INSERT INTO t1 VALUES ('2006-03-01 12:00:00'); +INSERT INTO t1 VALUES ('2006-03-01 12:00:00'); + +-- echo must use p0 only: +explain partitions select * from t1 where recdate < '2006-01-01 00:00:00'; +drop table t1; diff --git a/sql/item.cc b/sql/item.cc index d98a37e3c32..0076fe94bbd 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -2069,6 +2069,11 @@ Item *Item_field::get_tmp_table_item(THD *thd) return new_item; } +longlong Item_field::val_int_endpoint(bool left_endp, bool *incl_endp) +{ + longlong res= val_int(); + return null_value? LONGLONG_MIN : res; +} /* Create an item from a string we KNOW points to a valid longlong diff --git a/sql/item.h b/sql/item.h index 21c4560c6bc..9400399e177 100644 --- a/sql/item.h +++ b/sql/item.h @@ -569,6 +569,43 @@ public: virtual enum_monotonicity_info get_monotonicity_info() const { return NON_MONOTONIC; } + /* + Convert "func_arg $CMP$ const" half-interval into "FUNC(func_arg) $CMP2$ const2" + + SYNOPSIS + val_int_endpoint() + left_endp FALSE <=> The interval is "x < const" or "x <= const" + TRUE <=> The interval is "x > const" or "x >= const" + + incl_endp IN TRUE <=> the comparison is '<' or '>' + FALSE <=> the comparison is '<=' or '>=' + OUT The same but for the "F(x) $CMP$ F(const)" comparison + + DESCRIPTION + This function is defined only for unary monotonic functions. The caller + supplies the source half-interval + + x $CMP$ const + + The value of const is supplied implicitly as the value this item's + argument, the form of $CMP$ comparison is specified through the + function's arguments. The calle returns the result interval + + F(x) $CMP2$ F(const) + + passing back F(const) as the return value, and the form of $CMP2$ + through the out parameter. NULL values are assumed to be comparable and + be less than any non-NULL values. + + RETURN + The output range bound, which equal to the value of val_int() + - If the value of the function is NULL then the bound is the + smallest possible value of LONGLONG_MIN + */ + virtual longlong val_int_endpoint(bool left_endp, bool *incl_endp) + { DBUG_ASSERT(0); } + + /* valXXX methods must return NULL or 0 or 0.0 if null_value is set. */ /* Return double precision floating point representation of item. @@ -1401,6 +1438,7 @@ public: { return MONOTONIC_STRICT_INCREASING; } + longlong val_int_endpoint(bool left_endp, bool *incl_endp); Field *get_tmp_table_field() { return result_field; } Field *tmp_table_field(TABLE *t_arg) { return result_field; } bool get_date(MYSQL_TIME *ltime,uint fuzzydate); diff --git a/sql/item_timefunc.cc b/sql/item_timefunc.cc index 2fedb4b72b6..300d0788f55 100644 --- a/sql/item_timefunc.cc +++ b/sql/item_timefunc.cc @@ -962,6 +962,44 @@ enum_monotonicity_info Item_func_to_days::get_monotonicity_info() const } +longlong Item_func_to_days::val_int_endpoint(bool left_endp, bool *incl_endp) +{ + DBUG_ASSERT(fixed == 1); + MYSQL_TIME ltime; + longlong res; + if (get_arg0_date(<ime, TIME_NO_ZERO_DATE)) + { + /* got NULL, leave the incl_endp intact */ + return LONGLONG_MIN; + } + res=(longlong) calc_daynr(ltime.year,ltime.month,ltime.day); + + if (args[0]->field_type() == MYSQL_TYPE_DATE) + { + // TO_DAYS() is strictly monotonic for dates, leave incl_endp intact + return res; + } + + /* + Handle the special but practically useful case of datetime values that + point to day bound ("strictly less" comparison stays intact): + + col < '2007-09-15 00:00:00' -> TO_DAYS(col) < TO_DAYS('2007-09-15') + + which is different from the general case ("strictly less" changes to + "less or equal"): + + col < '2007-09-15 12:34:56' -> TO_DAYS(col) <= TO_DAYS('2007-09-15') + */ + if (!left_endp && !(ltime.hour || ltime.minute || ltime.second || + ltime.second_part)) + ; /* do nothing */ + else + *incl_endp= TRUE; + return res; +} + + longlong Item_func_dayofyear::val_int() { DBUG_ASSERT(fixed == 1); @@ -1152,7 +1190,7 @@ longlong Item_func_year::val_int() Get information about this Item tree monotonicity SYNOPSIS - Item_func_to_days::get_monotonicity_info() + Item_func_year::get_monotonicity_info() DESCRIPTION Get information about monotonicity of the function represented by this item @@ -1171,6 +1209,37 @@ enum_monotonicity_info Item_func_year::get_monotonicity_info() const return NON_MONOTONIC; } + +longlong Item_func_year::val_int_endpoint(bool left_endp, bool *incl_endp) +{ + DBUG_ASSERT(fixed == 1); + MYSQL_TIME ltime; + if (get_arg0_date(<ime, TIME_FUZZY_DATE)) + { + /* got NULL, leave the incl_endp intact */ + return LONGLONG_MIN; + } + + /* + Handle the special but practically useful case of datetime values that + point to year bound ("strictly less" comparison stays intact) : + + col < '2007-01-01 00:00:00' -> YEAR(col) < 2007 + + which is different from the general case ("strictly less" changes to + "less or equal"): + + col < '2007-09-15 23:00:00' -> YEAR(col) <= 2007 + */ + if (!left_endp && ltime.day == 1 && ltime.month == 1 && + !(ltime.hour || ltime.minute || ltime.second || ltime.second_part)) + ; /* do nothing */ + else + *incl_endp= TRUE; + return ltime.year; +} + + longlong Item_func_unix_timestamp::val_int() { MYSQL_TIME ltime; diff --git a/sql/item_timefunc.h b/sql/item_timefunc.h index 32f5bcf8e52..36556269d07 100644 --- a/sql/item_timefunc.h +++ b/sql/item_timefunc.h @@ -68,6 +68,7 @@ public: maybe_null=1; } enum_monotonicity_info get_monotonicity_info() const; + longlong val_int_endpoint(bool left_endp, bool *incl_endp); bool check_partition_func_processor(uchar *int_arg) {return FALSE;} }; @@ -248,6 +249,7 @@ public: longlong val_int(); const char *func_name() const { return "year"; } enum_monotonicity_info get_monotonicity_info() const; + longlong val_int_endpoint(bool left_endp, bool *incl_endp); void fix_length_and_dec() { decimals=0; diff --git a/sql/partition_info.h b/sql/partition_info.h index 10edea074c0..b7d13e188f3 100644 --- a/sql/partition_info.h +++ b/sql/partition_info.h @@ -139,20 +139,6 @@ public: */ get_partitions_in_range_iter get_subpart_iter_for_interval; - /* - Valid iff - get_part_iter_for_interval=get_part_iter_for_interval_via_walking: - controls how we'll process "field < C" and "field > C" intervals. - If the partitioning function F is strictly increasing, then for any x, y - "x < y" => "F(x) < F(y)" (*), i.e. when we get interval "field < C" - we can perform partition pruning on the equivalent "F(field) < F(C)". - - If the partitioning function not strictly increasing (it is simply - increasing), then instead of (*) we get "x < y" => "F(x) <= F(y)" - i.e. for interval "field < C" we can perform partition pruning for - "F(field) <= F(C)". - */ - bool range_analysis_include_bounds; /******************************************** * INTERVAL ANALYSIS ENDS ********************************************/ diff --git a/sql/sql_partition.cc b/sql/sql_partition.cc index 5cfb46a99ab..8a8a03cb4e4 100644 --- a/sql/sql_partition.cc +++ b/sql/sql_partition.cc @@ -2743,7 +2743,8 @@ uint32 get_list_array_idx_for_endpoint(partition_info *part_info, uint min_list_index= 0, max_list_index= part_info->no_list_values - 1; longlong list_value; /* Get the partitioning function value for the endpoint */ - longlong part_func_value= part_val_int(part_info->part_expr); + longlong part_func_value= + part_info->part_expr->val_int_endpoint(left_endpoint, &include_endpoint); bool unsigned_flag= part_info->part_expr->unsigned_flag; DBUG_ENTER("get_list_array_idx_for_endpoint"); @@ -2887,7 +2888,9 @@ uint32 get_partition_id_range_for_endpoint(partition_info *part_info, uint max_partition= part_info->no_parts - 1; uint min_part_id= 0, max_part_id= max_partition, loc_part_id; /* Get the partitioning function value for the endpoint */ - longlong part_func_value= part_val_int(part_info->part_expr); + longlong part_func_value= + part_info->part_expr->val_int_endpoint(left_endpoint, &include_endpoint); + bool unsigned_flag= part_info->part_expr->unsigned_flag; DBUG_ENTER("get_partition_id_range_for_endpoint"); @@ -6590,8 +6593,6 @@ void make_used_partitions_str(partition_info *part_info, String *parts_str) #ifdef WITH_PARTITION_STORAGE_ENGINE static void set_up_range_analysis_info(partition_info *part_info) { - enum_monotonicity_info minfo; - /* Set the catch-all default */ part_info->get_part_iter_for_interval= NULL; part_info->get_subpart_iter_for_interval= NULL; @@ -6603,11 +6604,8 @@ static void set_up_range_analysis_info(partition_info *part_info) switch (part_info->part_type) { case RANGE_PARTITION: case LIST_PARTITION: - minfo= part_info->part_expr->get_monotonicity_info(); - if (minfo != NON_MONOTONIC) + if (part_info->part_expr->get_monotonicity_info() != NON_MONOTONIC) { - part_info->range_analysis_include_bounds= - test(minfo == MONOTONIC_INCREASING); part_info->get_part_iter_for_interval= get_part_iter_for_interval_via_mapping; goto setup_subparts; @@ -6775,8 +6773,7 @@ int get_part_iter_for_interval_via_mapping(partition_info *part_info, index-in-ordered-array-of-list-constants (for LIST) space. */ store_key_image_to_rec(field, min_value, field_len); - bool include_endp= part_info->range_analysis_include_bounds || - !test(flags & NEAR_MIN); + bool include_endp= !test(flags & NEAR_MIN); part_iter->part_nums.start= get_endpoint(part_info, 1, include_endp); part_iter->part_nums.cur= part_iter->part_nums.start; if (part_iter->part_nums.start == max_endpoint_val) @@ -6790,8 +6787,7 @@ int get_part_iter_for_interval_via_mapping(partition_info *part_info, else { store_key_image_to_rec(field, max_value, field_len); - bool include_endp= part_info->range_analysis_include_bounds || - !test(flags & NEAR_MAX); + bool include_endp= !test(flags & NEAR_MAX); part_iter->part_nums.end= get_endpoint(part_info, 0, include_endp); if (part_iter->part_nums.start == part_iter->part_nums.end && !part_iter->ret_null_part)