From 1c5200f67d3a5669b987979227d5e314082b00db Mon Sep 17 00:00:00 2001 From: Martin Hansson Date: Mon, 10 May 2010 09:23:23 +0200 Subject: [PATCH] Bug#50939: Loose Index Scan unduly relies on engine to remember range endpoints The Loose Index Scan optimization keeps track of a sequence of intervals. For the current interval it maintains the current interval's endpoints. But the maximum endpoint was not stored in the SQL layer; rather, it relied on the storage engine to retain this value in-between reads. By coincidence this holds for MyISAM and InnoDB. Not for the partitioning engine, however. Fixed by making the key values iterator (QUICK_RANGE_SELECT) keep track of the current maximum endpoint. This is also more efficient as we save a call through the handler API in case of open-ended intervals. The code to calculate endpoints was extracted into separate methods in QUICK_RANGE_SELECT, and it was possible to get rid of some code duplication as part of fix. --- mysql-test/r/range.result | 44 ++++++++++++++++++ mysql-test/t/range.test | 41 +++++++++++++++++ sql/opt_range.cc | 93 ++++++++++++++------------------------- sql/opt_range.h | 83 +++++++++++++++++++++++++++++++++- 4 files changed, 199 insertions(+), 62 deletions(-) diff --git a/mysql-test/r/range.result b/mysql-test/r/range.result index 64e00521cd2..ad8aec7383a 100644 --- a/mysql-test/r/range.result +++ b/mysql-test/r/range.result @@ -1653,4 +1653,48 @@ a b 0 0 1 1 DROP TABLE t1; +# +# Bug#50939: Loose Index Scan unduly relies on engine to remember range +# endpoints +# +CREATE TABLE t1 ( +a INT, +b INT, +KEY ( a, b ) +) PARTITION BY HASH (a) PARTITIONS 1; +CREATE TABLE t2 ( +a INT, +b INT, +KEY ( a, b ) +); +INSERT INTO t1 VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5); +INSERT INTO t1 SELECT a + 5, b + 5 FROM t1; +INSERT INTO t1 SELECT a + 10, b + 10 FROM t1; +INSERT INTO t1 SELECT a + 20, b + 20 FROM t1; +INSERT INTO t1 SELECT a + 40, b + 40 FROM t1; +INSERT INTO t2 SELECT * FROM t1; +# plans should be identical +EXPLAIN SELECT a, MAX(b) FROM t1 WHERE a IN (10,100) GROUP BY a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 5 NULL 1 Using where; Using index for group-by +EXPLAIN SELECT a, MAX(b) FROM t2 WHERE a IN (10,100) GROUP BY a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 range a a 5 NULL 2 Using where; Using index for group-by +FLUSH status; +SELECT a, MAX(b) FROM t1 WHERE a IN (10, 100) GROUP BY a; +a MAX(b) +10 10 +# Should be no more than 4 reads. +SHOW status LIKE 'handler_read_key'; +Variable_name Value +Handler_read_key 4 +FLUSH status; +SELECT a, MAX(b) FROM t2 WHERE a IN (10, 100) GROUP BY a; +a MAX(b) +10 10 +# Should be no more than 4 reads. +SHOW status LIKE 'handler_read_key'; +Variable_name Value +Handler_read_key 4 +DROP TABLE t1, t2; End of 5.1 tests diff --git a/mysql-test/t/range.test b/mysql-test/t/range.test index 5d5ad180f1a..2d2a6f75d73 100644 --- a/mysql-test/t/range.test +++ b/mysql-test/t/range.test @@ -1313,4 +1313,45 @@ SELECT * FROM t1 FORCE INDEX (PRIMARY) DROP TABLE t1; +--echo # +--echo # Bug#50939: Loose Index Scan unduly relies on engine to remember range +--echo # endpoints +--echo # +CREATE TABLE t1 ( + a INT, + b INT, + KEY ( a, b ) +) PARTITION BY HASH (a) PARTITIONS 1; + +CREATE TABLE t2 ( + a INT, + b INT, + KEY ( a, b ) +); + +INSERT INTO t1 VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5); + +INSERT INTO t1 SELECT a + 5, b + 5 FROM t1; +INSERT INTO t1 SELECT a + 10, b + 10 FROM t1; +INSERT INTO t1 SELECT a + 20, b + 20 FROM t1; +INSERT INTO t1 SELECT a + 40, b + 40 FROM t1; + +INSERT INTO t2 SELECT * FROM t1; + +--echo # plans should be identical +EXPLAIN SELECT a, MAX(b) FROM t1 WHERE a IN (10,100) GROUP BY a; +EXPLAIN SELECT a, MAX(b) FROM t2 WHERE a IN (10,100) GROUP BY a; + +FLUSH status; +SELECT a, MAX(b) FROM t1 WHERE a IN (10, 100) GROUP BY a; +--echo # Should be no more than 4 reads. +SHOW status LIKE 'handler_read_key'; + +FLUSH status; +SELECT a, MAX(b) FROM t2 WHERE a IN (10, 100) GROUP BY a; +--echo # Should be no more than 4 reads. +SHOW status LIKE 'handler_read_key'; + +DROP TABLE t1, t2; + --echo End of 5.1 tests diff --git a/sql/opt_range.cc b/sql/opt_range.cc index 34757a44c2f..5c6cb64c04f 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -8532,8 +8532,6 @@ int QUICK_RANGE_SELECT::get_next() { int result; KEY_MULTI_RANGE *mrange; - key_range *start_key; - key_range *end_key; DBUG_ENTER("QUICK_RANGE_SELECT::get_next"); DBUG_ASSERT(multi_range_length && multi_range && (cur_range >= (QUICK_RANGE**) ranges.buffer) && @@ -8573,26 +8571,9 @@ int QUICK_RANGE_SELECT::get_next() mrange_slot < mrange_end; mrange_slot++) { - start_key= &mrange_slot->start_key; - end_key= &mrange_slot->end_key; last_range= *(cur_range++); - - start_key->key= (const uchar*) last_range->min_key; - start_key->length= last_range->min_length; - start_key->flag= ((last_range->flag & NEAR_MIN) ? HA_READ_AFTER_KEY : - (last_range->flag & EQ_RANGE) ? - HA_READ_KEY_EXACT : HA_READ_KEY_OR_NEXT); - start_key->keypart_map= last_range->min_keypart_map; - end_key->key= (const uchar*) last_range->max_key; - end_key->length= last_range->max_length; - /* - We use HA_READ_AFTER_KEY here because if we are reading on a key - prefix. We want to find all keys with this prefix. - */ - end_key->flag= (last_range->flag & NEAR_MAX ? HA_READ_BEFORE_KEY : - HA_READ_AFTER_KEY); - end_key->keypart_map= last_range->max_keypart_map; - + last_range->make_min_endpoint(&mrange_slot->start_key); + last_range->make_max_endpoint(&mrange_slot->end_key); mrange_slot->range_flag= last_range->flag; } @@ -8616,49 +8597,52 @@ end: /* Get the next record with a different prefix. - SYNOPSIS - QUICK_RANGE_SELECT::get_next_prefix() - prefix_length length of cur_prefix - cur_prefix prefix of a key to be searched for + @param prefix_length length of cur_prefix + @param group_key_parts The number of key parts in the group prefix + @param cur_prefix prefix of a key to be searched for - DESCRIPTION - Each subsequent call to the method retrieves the first record that has a - prefix with length prefix_length different from cur_prefix, such that the - record with the new prefix is within the ranges described by - this->ranges. The record found is stored into the buffer pointed by - this->record. - The method is useful for GROUP-BY queries with range conditions to - discover the prefix of the next group that satisfies the range conditions. + Each subsequent call to the method retrieves the first record that has a + prefix with length prefix_length and which is different from cur_prefix, + such that the record with the new prefix is within the ranges described by + this->ranges. The record found is stored into the buffer pointed by + this->record. The method is useful for GROUP-BY queries with range + conditions to discover the prefix of the next group that satisfies the range + conditions. + + @todo - TODO This method is a modified copy of QUICK_RANGE_SELECT::get_next(), so both methods should be unified into a more general one to reduce code duplication. - RETURN - 0 on success - HA_ERR_END_OF_FILE if returned all keys - other if some error occurred + @retval 0 on success + @retval HA_ERR_END_OF_FILE if returned all keys + @retval other if some error occurred */ int QUICK_RANGE_SELECT::get_next_prefix(uint prefix_length, - key_part_map keypart_map, + uint group_key_parts, uchar *cur_prefix) { DBUG_ENTER("QUICK_RANGE_SELECT::get_next_prefix"); + const key_part_map keypart_map= make_prev_keypart_map(group_key_parts); for (;;) { int result; - key_range start_key, end_key; if (last_range) { /* Read the next record in the same range with prefix after cur_prefix. */ - DBUG_ASSERT(cur_prefix != 0); + DBUG_ASSERT(cur_prefix != NULL); result= file->index_read_map(record, cur_prefix, keypart_map, HA_READ_AFTER_KEY); - if (result || (file->compare_key(file->end_range) <= 0)) + if (result || last_range->max_keypart_map == 0) DBUG_RETURN(result); + + key_range previous_endpoint; + last_range->make_max_endpoint(&previous_endpoint, prefix_length, keypart_map); + if (file->compare_key(&previous_endpoint) <= 0) + DBUG_RETURN(0); } uint count= ranges.elements - (cur_range - (QUICK_RANGE**) ranges.buffer); @@ -8670,21 +8654,9 @@ int QUICK_RANGE_SELECT::get_next_prefix(uint prefix_length, } last_range= *(cur_range++); - start_key.key= (const uchar*) last_range->min_key; - start_key.length= min(last_range->min_length, prefix_length); - start_key.keypart_map= last_range->min_keypart_map & keypart_map; - start_key.flag= ((last_range->flag & NEAR_MIN) ? HA_READ_AFTER_KEY : - (last_range->flag & EQ_RANGE) ? - HA_READ_KEY_EXACT : HA_READ_KEY_OR_NEXT); - end_key.key= (const uchar*) last_range->max_key; - end_key.length= min(last_range->max_length, prefix_length); - end_key.keypart_map= last_range->max_keypart_map & keypart_map; - /* - We use READ_AFTER_KEY here because if we are reading on a key - prefix we want to find all keys with this prefix - */ - end_key.flag= (last_range->flag & NEAR_MAX ? HA_READ_BEFORE_KEY : - HA_READ_AFTER_KEY); + key_range start_key, end_key; + last_range->make_min_endpoint(&start_key, prefix_length, keypart_map); + last_range->make_max_endpoint(&end_key, prefix_length, keypart_map); result= file->read_range_first(last_range->min_keypart_map ? &start_key : 0, last_range->max_keypart_map ? &end_key : 0, @@ -8779,9 +8751,9 @@ bool QUICK_RANGE_SELECT::row_in_ranges() } /* - This is a hack: we inherit from QUICK_SELECT so that we can use the + This is a hack: we inherit from QUICK_RANGE_SELECT so that we can use the get_next() interface, but we have to hold a pointer to the original - QUICK_SELECT because its data are used all over the place. What + QUICK_RANGE_SELECT because its data are used all over the place. What should be done is to factor out the data that is needed into a base class (QUICK_SELECT), and then have two subclasses (_ASC and _DESC) which handle the ranges and implement the get_next() function. But @@ -10903,7 +10875,8 @@ int QUICK_GROUP_MIN_MAX_SELECT::next_prefix() { uchar *cur_prefix= seen_first_key ? group_prefix : NULL; if ((result= quick_prefix_select->get_next_prefix(group_prefix_len, - make_prev_keypart_map(group_key_parts), cur_prefix))) + group_key_parts, + cur_prefix))) DBUG_RETURN(result); seen_first_key= TRUE; } diff --git a/sql/opt_range.h b/sql/opt_range.h index 8d2ba1bb0a6..e7d8297faf8 100644 --- a/sql/opt_range.h +++ b/sql/opt_range.h @@ -65,6 +65,85 @@ class QUICK_RANGE :public Sql_alloc { dummy=0; #endif } + + /** + Initalizes a key_range object for communication with storage engine. + + This function facilitates communication with the Storage Engine API by + translating the minimum endpoint of the interval represented by this + QUICK_RANGE into an index range endpoint specifier for the engine. + + @param Pointer to an uninitialized key_range C struct. + + @param prefix_length The length of the search key prefix to be used for + lookup. + + @param keypart_map A set (bitmap) of keyparts to be used. + */ + void make_min_endpoint(key_range *kr, uint prefix_length, + key_part_map keypart_map) { + make_min_endpoint(kr); + kr->length= min(kr->length, prefix_length); + kr->keypart_map&= keypart_map; + } + + /** + Initalizes a key_range object for communication with storage engine. + + This function facilitates communication with the Storage Engine API by + translating the minimum endpoint of the interval represented by this + QUICK_RANGE into an index range endpoint specifier for the engine. + + @param Pointer to an uninitialized key_range C struct. + */ + void make_min_endpoint(key_range *kr) { + kr->key= (const uchar*)min_key; + kr->length= min_length; + kr->keypart_map= min_keypart_map; + kr->flag= ((flag & NEAR_MIN) ? HA_READ_AFTER_KEY : + (flag & EQ_RANGE) ? HA_READ_KEY_EXACT : HA_READ_KEY_OR_NEXT); + } + + /** + Initalizes a key_range object for communication with storage engine. + + This function facilitates communication with the Storage Engine API by + translating the maximum endpoint of the interval represented by this + QUICK_RANGE into an index range endpoint specifier for the engine. + + @param Pointer to an uninitialized key_range C struct. + + @param prefix_length The length of the search key prefix to be used for + lookup. + + @param keypart_map A set (bitmap) of keyparts to be used. + */ + void make_max_endpoint(key_range *kr, uint prefix_length, + key_part_map keypart_map) { + make_max_endpoint(kr); + kr->length= min(kr->length, prefix_length); + kr->keypart_map&= keypart_map; + } + + /** + Initalizes a key_range object for communication with storage engine. + + This function facilitates communication with the Storage Engine API by + translating the maximum endpoint of the interval represented by this + QUICK_RANGE into an index range endpoint specifier for the engine. + + @param Pointer to an uninitialized key_range C struct. + */ + void make_max_endpoint(key_range *kr) { + kr->key= (const uchar*)max_key; + kr->length= max_length; + kr->keypart_map= max_keypart_map; + /* + We use READ_AFTER_KEY here because if we are reading on a key + prefix we want to find all keys with this prefix + */ + kr->flag= (flag & NEAR_MAX ? HA_READ_BEFORE_KEY : HA_READ_AFTER_KEY); + } }; @@ -331,7 +410,7 @@ public: int reset(void); int get_next(); void range_end(); - int get_next_prefix(uint prefix_length, key_part_map keypart_map, + int get_next_prefix(uint prefix_length, uint group_key_parts, uchar *cur_prefix); bool reverse_sorted() { return 0; } bool unique_key_range(); @@ -611,7 +690,7 @@ private: uchar *record; /* Buffer where the next record is returned. */ uchar *tmp_record; /* Temporary storage for next_min(), next_max(). */ uchar *group_prefix; /* Key prefix consisting of the GROUP fields. */ - uint group_prefix_len; /* Length of the group prefix. */ + const uint group_prefix_len; /* Length of the group prefix. */ uint group_key_parts; /* A number of keyparts in the group prefix */ uchar *last_prefix; /* Prefix of the last group for detecting EOF. */ bool have_min; /* Specify whether we are computing */