mirror of
https://github.com/MariaDB/server.git
synced 2025-01-16 03:52:35 +01:00
DS-MRR improvements: address review feedback for R3 version of the patch
This commit is contained in:
parent
ac8a79b944
commit
d8efc3b155
3 changed files with 882 additions and 598 deletions
|
@ -2177,7 +2177,8 @@ public:
|
|||
TRUE if the engine supports virtual columns
|
||||
*/
|
||||
virtual bool check_if_supported_virtual_columns(void) { return FALSE;}
|
||||
|
||||
|
||||
TABLE* get_table() { return table; }
|
||||
protected:
|
||||
/* deprecated, don't use in new engines */
|
||||
inline void ha_statistic_increment(ulong SSV::*offset) const { }
|
||||
|
@ -2370,7 +2371,6 @@ private:
|
|||
virtual int rename_partitions(const char *path)
|
||||
{ return HA_ERR_WRONG_COMMAND; }
|
||||
friend class ha_partition;
|
||||
friend class DsMrr_impl;
|
||||
public:
|
||||
/* XXX to be removed, see ha_partition::partition_ht() */
|
||||
virtual handlerton *partition_ht() const
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -50,6 +50,26 @@
|
|||
|
||||
class DsMrr_impl;
|
||||
|
||||
class Key_parameters
|
||||
{
|
||||
public:
|
||||
/* TRUE <=> We can get at most one index tuple for a lookup key */
|
||||
bool index_ranges_unique;
|
||||
|
||||
uint key_tuple_length; /* Length of index lookup tuple, in bytes */
|
||||
key_part_map key_tuple_map; /* keyparts used in index lookup tuples */
|
||||
|
||||
/*
|
||||
This is
|
||||
= key_tuple_length if we copy keys to buffer
|
||||
= sizeof(void*) if we're using pointers to materialized keys.
|
||||
*/
|
||||
uint key_size_in_keybuf;
|
||||
|
||||
/* TRUE <=> don't copy key values, use pointers to them instead. */
|
||||
bool use_key_pointers;
|
||||
};
|
||||
|
||||
/**
|
||||
Iterator over (record, range_id) pairs that match given key value.
|
||||
|
||||
|
@ -57,16 +77,23 @@ class DsMrr_impl;
|
|||
key value. A key value may have multiple matching records, so we'll need to
|
||||
produce a cross-product of sets of matching records and range_id-s.
|
||||
*/
|
||||
|
||||
class Mrr_ordered_index_reader;
|
||||
class Key_value_records_iterator
|
||||
{
|
||||
/* Scan parameters */
|
||||
DsMrr_impl *dsmrr;
|
||||
Key_parameters *param;
|
||||
Lifo_buffer_iterator identical_key_it;
|
||||
uchar *last_identical_key_ptr;
|
||||
bool get_next_row;
|
||||
//handler *h;
|
||||
/* TRUE <=> We can get at most one index tuple for a lookup key */
|
||||
//bool index_ranges_unique;
|
||||
|
||||
Mrr_ordered_index_reader *owner;
|
||||
/* key_buffer.read() reads to here */
|
||||
uchar *cur_index_tuple;
|
||||
public:
|
||||
bool init(DsMrr_impl *dsmrr);
|
||||
bool init(Mrr_ordered_index_reader *owner_arg);
|
||||
|
||||
/*
|
||||
Get next (key_val, range_id) pair.
|
||||
|
@ -74,9 +101,184 @@ public:
|
|||
int get_next();
|
||||
|
||||
void close();
|
||||
friend class Mrr_ordered_index_reader;
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
Something that will manage buffers for those that call it
|
||||
*/
|
||||
class Buffer_manager
|
||||
{
|
||||
public:
|
||||
virtual void reset_buffer_sizes()= 0;
|
||||
virtual void setup_buffer_sizes(uint key_size_in_keybuf,
|
||||
key_part_map key_tuple_map)=0;
|
||||
virtual Lifo_buffer* get_key_buffer()= 0;
|
||||
virtual ~Buffer_manager(){}
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
Abstract MRR execution strategy
|
||||
|
||||
An object of this class produces (R, range_info) pairs where R can be an
|
||||
index tuple or a table record.
|
||||
|
||||
Getting HA_ERR_END_OF_FILE from get_next() means that the source should be
|
||||
re-filled. if eof() returns true after refill attempt, then end of stream has
|
||||
been reached and get_next() must not be called anymore.
|
||||
*/
|
||||
|
||||
class Mrr_strategy
|
||||
{
|
||||
public:
|
||||
virtual int get_next(char **range_info) = 0;
|
||||
virtual int refill_buffer()=0;
|
||||
|
||||
virtual ~Mrr_strategy() {};
|
||||
};
|
||||
|
||||
|
||||
/* A common base for strategies that do index scans and produce index tuples */
|
||||
class Mrr_index_reader : public Mrr_strategy
|
||||
{
|
||||
public:
|
||||
handler *h;
|
||||
|
||||
virtual int init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
|
||||
void *seq_init_param, uint n_ranges,
|
||||
uint mode, Buffer_manager *buf_manager_arg) = 0;
|
||||
virtual bool eof() = 0;
|
||||
virtual uchar *get_rowid_ptr()= 0;
|
||||
virtual bool skip_record(char *range_id, uchar *rowid)=0;
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
A "bypass" strategy that uses default MRR implementation (i.e.
|
||||
handler::multi_range_read_XXX() calls) to produce rows.
|
||||
*/
|
||||
|
||||
class Mrr_simple_index_reader : public Mrr_index_reader
|
||||
{
|
||||
int res;
|
||||
public:
|
||||
int init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
|
||||
void *seq_init_param, uint n_ranges,
|
||||
uint mode, Buffer_manager *buf_manager_arg);
|
||||
int get_next(char **range_info);
|
||||
int refill_buffer() { return 0; }
|
||||
bool eof() { return test(res); }
|
||||
uchar *get_rowid_ptr() { return h->ref; }
|
||||
bool skip_record(char *range_id, uchar *rowid)
|
||||
{
|
||||
return (h->mrr_funcs.skip_record &&
|
||||
h->mrr_funcs.skip_record(h->mrr_iter, range_id, rowid));
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
/*
|
||||
A strategy that sorts index lookup keys before scanning the index
|
||||
*/
|
||||
|
||||
class Mrr_ordered_index_reader : public Mrr_index_reader
|
||||
{
|
||||
public:
|
||||
int init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
|
||||
void *seq_init_param, uint n_ranges,
|
||||
uint mode, Buffer_manager *buf_manager_arg);
|
||||
int get_next(char **range_info);
|
||||
int refill_buffer();
|
||||
bool eof() { return index_scan_eof; }
|
||||
uchar *get_rowid_ptr() { return h->ref; }
|
||||
|
||||
bool skip_record(char *range_info, uchar *rowid)
|
||||
{
|
||||
return (mrr_funcs.skip_record &&
|
||||
mrr_funcs.skip_record(mrr_iter, range_info, rowid));
|
||||
}
|
||||
private:
|
||||
Key_value_records_iterator kv_it;
|
||||
|
||||
bool scanning_key_val_iter;
|
||||
|
||||
char *cur_range_info;
|
||||
|
||||
/* Buffer to store (key, range_id) pairs */
|
||||
Lifo_buffer *key_buffer;
|
||||
|
||||
Buffer_manager *buf_manager;
|
||||
|
||||
/* Initially FALSE, becomes TRUE when we've set key_tuple_xxx members */
|
||||
bool know_key_tuple_params;
|
||||
|
||||
// bool use_key_pointers;
|
||||
|
||||
Key_parameters keypar;
|
||||
/* TRUE <=> need range association, buffers hold {rowid, range_id} pairs */
|
||||
bool is_mrr_assoc;
|
||||
|
||||
bool no_more_keys;
|
||||
RANGE_SEQ_IF mrr_funcs;
|
||||
range_seq_t mrr_iter;
|
||||
|
||||
bool auto_refill;
|
||||
|
||||
bool index_scan_eof;
|
||||
|
||||
static int key_tuple_cmp(void* arg, uchar* key1, uchar* key2);
|
||||
static int key_tuple_cmp_reverse(void* arg, uchar* key1, uchar* key2);
|
||||
//void cleanup();
|
||||
|
||||
friend class Key_value_records_iterator;
|
||||
friend class DsMrr_impl;
|
||||
friend class Mrr_ordered_rndpos_reader;
|
||||
};
|
||||
|
||||
|
||||
/* MRR strategy that fetches rowids */
|
||||
|
||||
class Mrr_ordered_rndpos_reader : public Mrr_strategy
|
||||
{
|
||||
public:
|
||||
int init(handler *h, Mrr_index_reader *index_reader, uint mode,
|
||||
Lifo_buffer *buf);
|
||||
int get_next(char **range_info);
|
||||
int refill_buffer();
|
||||
void cleanup();
|
||||
private:
|
||||
handler *h;
|
||||
|
||||
DsMrr_impl *dsmrr;
|
||||
/* This what we get (rowid, range_info) pairs from */
|
||||
Mrr_index_reader *index_reader;
|
||||
uchar *index_rowid;
|
||||
|
||||
/* TRUE <=> need range association, buffers hold {rowid, range_id} pairs */
|
||||
bool is_mrr_assoc;
|
||||
|
||||
uchar *last_identical_rowid;
|
||||
Lifo_buffer *rowid_buffer;
|
||||
|
||||
/* = h->ref_length [ + sizeof(range_assoc_info) ] */
|
||||
//uint rowid_buff_elem_size;
|
||||
|
||||
/* rowid_buffer.read() will set the following: */
|
||||
uchar *rowid;
|
||||
uchar *rowids_range_id;
|
||||
};
|
||||
|
||||
class Mrr_strategy_factory
|
||||
{
|
||||
public:
|
||||
Mrr_ordered_rndpos_reader ordered_rndpos_reader;
|
||||
Mrr_ordered_index_reader ordered_index_reader;
|
||||
Mrr_simple_index_reader simple_index_reader;
|
||||
};
|
||||
|
||||
/*
|
||||
DS-MRR implementation for one table. Create/use one object of this class for
|
||||
each ha_{myisam/innobase/etc} object. That object will be further referred to
|
||||
|
@ -154,9 +356,58 @@ public:
|
|||
get record by rowid and return the {record, range_id} pair
|
||||
4. Repeat the above steps until we've exhausted the list of ranges we're
|
||||
scanning.
|
||||
|
||||
Buffer space management considerations
|
||||
--------------------------------------
|
||||
With regards to buffer/memory management, MRR interface specifies that
|
||||
- SQL layer provides multi_range_read_init() with buffer of certain size.
|
||||
- MRR implementation may use (i.e. have at its disposal till the end of
|
||||
the MRR scan) all of the buffer, or return the unused end of the buffer
|
||||
to SQL layer.
|
||||
|
||||
DS-MRR needs buffer in order to accumulate and sort rowids and/or keys. When
|
||||
we need to accumulate/sort only keys (or only rowids), it is fairly trivial.
|
||||
|
||||
When we need to accumulate/sort both keys and rowids, efficient buffer use
|
||||
gets complicated. We need to:
|
||||
- First, accumulate keys and sort them
|
||||
- Then use the keys (smaller values go first) to obtain rowids. A key is not
|
||||
needed after we've got matching rowids for it.
|
||||
- Make sure that rowids are accumulated at the front of the buffer, so that we
|
||||
can return the end part of the buffer to SQL layer, should there be too
|
||||
few rowid values to occupy the buffer.
|
||||
|
||||
All of these goals are achieved by using the following scheme:
|
||||
|
||||
| | We get an empty buffer from SQL layer.
|
||||
|
||||
| *-|
|
||||
| *----| First, we fill the buffer with keys. Key_buffer
|
||||
| *-------| part grows from end of the buffer space to start
|
||||
| *----------| (In this picture, the buffer is big enough to
|
||||
| *-------------| accomodate all keys and even have some space left)
|
||||
|
||||
| *=============| We want to do key-ordered index scan, so we sort
|
||||
the keys
|
||||
|
||||
|-x *===========| Then we use the keys get rowids. Rowids are
|
||||
|----x *========| stored from start of buffer space towards the end.
|
||||
|--------x *=====| The part of the buffer occupied with keys
|
||||
|------------x *===| gradually frees up space for rowids. In this
|
||||
|--------------x *=| picture we run out of keys before we've ran out
|
||||
|----------------x | of buffer space (it can be other way as well).
|
||||
|
||||
|================x | Then we sort the rowids.
|
||||
|
||||
| |~~~| The unused part of the buffer is at the end, so
|
||||
we can return it to the SQL layer.
|
||||
|
||||
|================* Sorted rowids are then used to read table records
|
||||
in disk order
|
||||
|
||||
*/
|
||||
|
||||
class DsMrr_impl
|
||||
class DsMrr_impl : public Buffer_manager
|
||||
{
|
||||
public:
|
||||
typedef void (handler::*range_check_toggle_func_t)(bool on);
|
||||
|
@ -181,6 +432,9 @@ public:
|
|||
void *seq_init_param, uint n_ranges, uint *bufsz,
|
||||
uint *flags, COST_VECT *cost);
|
||||
private:
|
||||
/* Buffer to store (key, range_id) pairs */
|
||||
Lifo_buffer *key_buffer;
|
||||
|
||||
/*
|
||||
The "owner" handler object (the one that is expected to "own" this object
|
||||
and call its functions).
|
||||
|
@ -197,20 +451,16 @@ private:
|
|||
/** Properties of current MRR scan **/
|
||||
|
||||
uint keyno; /* index we're running the scan on */
|
||||
bool use_default_impl; /* TRUE <=> shortcut all calls to default MRR impl */
|
||||
/* TRUE <=> need range association, buffers hold {rowid, range_id} pairs */
|
||||
bool is_mrr_assoc;
|
||||
/* TRUE <=> sort the keys before making index lookups */
|
||||
bool do_sort_keys;
|
||||
//bool do_sort_keys;
|
||||
/* TRUE <=> sort rowids and use rnd_pos() to get and return full records */
|
||||
bool do_rndpos_scan;
|
||||
|
||||
/*
|
||||
(if do_sort_keys==TRUE) don't copy key values, use pointers to them
|
||||
instead.
|
||||
*/
|
||||
bool use_key_pointers;
|
||||
//bool do_rndpos_scan;
|
||||
|
||||
Mrr_strategy_factory strategy_factory;
|
||||
Mrr_strategy *strategy;
|
||||
Mrr_index_reader *index_strategy;
|
||||
|
||||
/* The whole buffer space that we're using */
|
||||
uchar *full_buf;
|
||||
|
@ -226,12 +476,6 @@ private:
|
|||
|
||||
/** Index scaning and key buffer-related members **/
|
||||
|
||||
/* TRUE <=> We can get at most one index tuple for a lookup key */
|
||||
bool index_ranges_unique;
|
||||
|
||||
/* TRUE<=> we're in a middle of enumerating records for a key range */
|
||||
//bool in_index_range;
|
||||
|
||||
/*
|
||||
One of the following two is used for key buffer: forward is used when
|
||||
we only need key buffer, backward is used when we need both key and rowid
|
||||
|
@ -240,39 +484,10 @@ private:
|
|||
Forward_lifo_buffer forward_key_buf;
|
||||
Backward_lifo_buffer backward_key_buf;
|
||||
|
||||
/* Buffer to store (key, range_id) pairs */
|
||||
Lifo_buffer *key_buffer;
|
||||
|
||||
/* Index scan state */
|
||||
bool scanning_key_val_iter;
|
||||
/*
|
||||
TRUE <=> we've got index tuples/rowids for all keys (need this flag because
|
||||
we may have a situation where we've read everything from the key buffer but
|
||||
haven't finished with getting index tuples for the last key)
|
||||
*/
|
||||
bool index_scan_eof;
|
||||
Key_value_records_iterator kv_it;
|
||||
|
||||
/* key_buffer.read() reads to here */
|
||||
uchar *cur_index_tuple;
|
||||
|
||||
/* if in_index_range==TRUE: range_id of the range we're enumerating */
|
||||
char *cur_range_info;
|
||||
|
||||
/* Initially FALSE, becomes TRUE when we've set key_tuple_xxx members */
|
||||
bool know_key_tuple_params;
|
||||
uint key_tuple_length; /* Length of index lookup tuple, in bytes */
|
||||
key_part_map key_tuple_map; /* keyparts used in index lookup tuples */
|
||||
|
||||
/*
|
||||
This is
|
||||
= key_tuple_length if we copy keys to buffer
|
||||
= sizeof(void*) if we're using pointers to materialized keys.
|
||||
*/
|
||||
uint key_size_in_keybuf;
|
||||
Forward_lifo_buffer rowid_buffer;
|
||||
|
||||
/* = key_size_in_keybuf [ + sizeof(range_assoc_info) ] */
|
||||
uint key_buff_elem_size;
|
||||
//uint key_buff_elem_size_;
|
||||
|
||||
/** rnd_pos() scan and rowid buffer-related members **/
|
||||
|
||||
|
@ -280,36 +495,27 @@ private:
|
|||
Buffer to store (rowid, range_id) pairs, or just rowids if
|
||||
is_mrr_assoc==FALSE
|
||||
*/
|
||||
Forward_lifo_buffer rowid_buffer;
|
||||
|
||||
/* rowid_buffer.read() will set the following: */
|
||||
uchar *rowid;
|
||||
uchar *rowids_range_id;
|
||||
|
||||
uchar *last_identical_rowid;
|
||||
|
||||
bool dsmrr_eof; /* TRUE <=> We have reached EOF when reading index tuples */
|
||||
|
||||
/* = h->ref_length [ + sizeof(range_assoc_info) ] */
|
||||
uint rowid_buff_elem_size;
|
||||
//Forward_lifo_buffer rowid_buffer;
|
||||
|
||||
bool choose_mrr_impl(uint keyno, ha_rows rows, uint *flags, uint *bufsz,
|
||||
COST_VECT *cost);
|
||||
bool get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
|
||||
uint *buffer_size, COST_VECT *cost);
|
||||
bool check_cpk_scan(THD *thd, uint keyno, uint mrr_flags);
|
||||
static int key_tuple_cmp(void* arg, uchar* key1, uchar* key2);
|
||||
static int key_tuple_cmp_reverse(void* arg, uchar* key1, uchar* key2);
|
||||
int dsmrr_fill_rowid_buffer();
|
||||
void dsmrr_fill_key_buffer();
|
||||
int dsmrr_next_from_index(char **range_info);
|
||||
|
||||
void setup_buffer_sizes(key_range *sample_key);
|
||||
void reallocate_buffer_space();
|
||||
|
||||
static range_seq_t key_buf_seq_init(void *init_param, uint n_ranges, uint flags);
|
||||
static uint key_buf_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range);
|
||||
/* Buffer_manager implementation */
|
||||
void setup_buffer_sizes(uint key_size_in_keybuf, key_part_map key_tuple_map);
|
||||
void reset_buffer_sizes();
|
||||
Lifo_buffer* get_key_buffer() { return key_buffer; }
|
||||
|
||||
friend class Key_value_records_iterator;
|
||||
friend class Mrr_ordered_index_reader;
|
||||
friend class Mrr_ordered_rndpos_reader;
|
||||
|
||||
int setup_two_handlers();
|
||||
void close_second_handler();
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
Loading…
Reference in a new issue