2012-02-17 12:19:38 +01:00
|
|
|
/*
|
|
|
|
Copyright (c) 2009, 2011, Monty Program Ab
|
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation; version 2 of the License.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program; if not, write to the Free Software
|
Update FSF address
This commit is based on the work of Michal Schorm, rebased on the
earliest MariaDB version.
Th command line used to generate this diff was:
find ./ -type f \
-exec sed -i -e 's/Foundation, Inc., 59 Temple Place, Suite 330, Boston, /Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, /g' {} \; \
-exec sed -i -e 's/Foundation, Inc. 59 Temple Place.* Suite 330, Boston, /Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, /g' {} \; \
-exec sed -i -e 's/MA.*.....-1307.*USA/MA 02110-1335 USA/g' {} \; \
-exec sed -i -e 's/Foundation, Inc., 59 Temple/Foundation, Inc., 51 Franklin/g' {} \; \
-exec sed -i -e 's/Place, Suite 330, Boston, MA.*02111-1307.*USA/Street, Fifth Floor, Boston, MA 02110-1335 USA/g' {} \; \
-exec sed -i -e 's/MA.*.....-1307/MA 02110-1335/g' {} \;
2019-05-10 19:49:46 +02:00
|
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
|
2012-02-17 12:19:38 +01:00
|
|
|
|
2010-09-28 10:19:50 +02:00
|
|
|
/**
|
|
|
|
@defgroup DS-MRR declarations
|
|
|
|
@{
|
2009-12-15 18:23:55 +01:00
|
|
|
*/
|
2009-12-15 08:16:46 +01:00
|
|
|
|
|
|
|
/**
|
2010-06-22 19:24:22 +02:00
|
|
|
A Disk-Sweep implementation of MRR Interface (DS-MRR for short)
|
2009-12-15 08:16:46 +01:00
|
|
|
|
2010-09-18 23:05:47 +02:00
|
|
|
This is a "plugin"(*) for storage engines that allows to
|
|
|
|
1. When doing index scans, read table rows in rowid order;
|
|
|
|
2. when making many index lookups, do them in key order and don't
|
|
|
|
lookup the same key value multiple times;
|
|
|
|
3. Do both #1 and #2, when applicable.
|
|
|
|
These changes are expected to speed up query execution for disk-based
|
|
|
|
storage engines running io-bound loads and "big" queries (ie. queries that
|
|
|
|
do joins and enumerate lots of records).
|
2010-06-22 19:24:22 +02:00
|
|
|
|
|
|
|
(*) - only conceptually. No dynamic loading or binary compatibility of any
|
|
|
|
kind.
|
|
|
|
|
|
|
|
General scheme of things:
|
|
|
|
|
|
|
|
SQL Layer code
|
|
|
|
| | |
|
2010-09-18 23:05:47 +02:00
|
|
|
v v v
|
|
|
|
-|---|---|---- handler->multi_range_read_XXX() function calls
|
2010-06-22 19:24:22 +02:00
|
|
|
| | |
|
2010-09-18 23:05:47 +02:00
|
|
|
_____________________________________
|
|
|
|
/ DS-MRR module \
|
|
|
|
| (order/de-duplicate lookup keys, |
|
|
|
|
| scan indexes in key order, |
|
|
|
|
| order/de-duplicate rowids, |
|
|
|
|
| retrieve full record reads in rowid |
|
|
|
|
| order) |
|
|
|
|
\_____________________________________/
|
2010-06-22 19:24:22 +02:00
|
|
|
| | |
|
|
|
|
-|---|---|----- handler->read_range_first()/read_range_next(),
|
|
|
|
| | | handler->index_read(), handler->rnd_pos() calls.
|
|
|
|
| | |
|
|
|
|
v v v
|
|
|
|
Storage engine internals
|
2010-09-18 23:05:47 +02:00
|
|
|
|
|
|
|
|
2017-06-21 12:44:16 +02:00
|
|
|
Currently DS-MRR is used by MyISAM, InnoDB and Maria storage engines.
|
2010-06-22 19:24:22 +02:00
|
|
|
Potentially it can be used with any table handler that has disk-based data
|
|
|
|
storage and has better performance when reading data in rowid order.
|
|
|
|
*/
|
|
|
|
|
2010-09-28 10:19:50 +02:00
|
|
|
#include "sql_lifo_buffer.h"
|
2010-09-05 12:32:14 +02:00
|
|
|
|
2010-10-03 12:48:42 +02:00
|
|
|
class DsMrr_impl;
|
2010-11-02 21:25:35 +01:00
|
|
|
class Mrr_ordered_index_reader;
|
|
|
|
|
2010-10-03 12:48:42 +02:00
|
|
|
|
2010-11-02 21:25:35 +01:00
|
|
|
/* A structure with key parameters that's shared among several classes */
|
2010-10-26 13:35:13 +02:00
|
|
|
class Key_parameters
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
uint key_tuple_length; /* Length of index lookup tuple, in bytes */
|
|
|
|
key_part_map key_tuple_map; /* keyparts used in index lookup tuples */
|
|
|
|
|
|
|
|
/*
|
|
|
|
This is
|
|
|
|
= key_tuple_length if we copy keys to buffer
|
|
|
|
= sizeof(void*) if we're using pointers to materialized keys.
|
|
|
|
*/
|
|
|
|
uint key_size_in_keybuf;
|
|
|
|
|
|
|
|
/* TRUE <=> don't copy key values, use pointers to them instead. */
|
|
|
|
bool use_key_pointers;
|
2010-12-02 12:10:52 +01:00
|
|
|
|
|
|
|
/* TRUE <=> We can get at most one index tuple for a lookup key */
|
|
|
|
bool index_ranges_unique;
|
2010-10-26 13:35:13 +02:00
|
|
|
};
|
|
|
|
|
2010-11-02 21:25:35 +01:00
|
|
|
|
2010-10-03 12:48:42 +02:00
|
|
|
/**
|
2010-11-02 21:25:35 +01:00
|
|
|
A class to enumerate (record, range_id) pairs that match given key value.
|
2010-10-03 12:48:42 +02:00
|
|
|
|
2010-11-08 13:15:50 +01:00
|
|
|
@note
|
2010-11-02 21:25:35 +01:00
|
|
|
|
2010-11-08 13:15:50 +01:00
|
|
|
The idea is that we have a Lifo_buffer which holds (key, range_id) pairs
|
|
|
|
ordered by key value. From the front of the buffer we see
|
2010-11-02 21:25:35 +01:00
|
|
|
|
2010-11-08 13:15:50 +01:00
|
|
|
(key_val1, range_id1), (key_val1, range_id2) ... (key_val2, range_idN)
|
2010-11-02 21:25:35 +01:00
|
|
|
|
2010-11-08 13:15:50 +01:00
|
|
|
we take the first elements that have the same key value (key_val1 in the
|
|
|
|
example above), and make lookup into the table. The table will have
|
|
|
|
multiple matches for key_val1:
|
|
|
|
|
|
|
|
== Table Index ==
|
|
|
|
...
|
|
|
|
key_val1 -> key_val1, index_tuple1
|
|
|
|
key_val1, index_tuple2
|
|
|
|
...
|
|
|
|
key_val1, index_tupleN
|
|
|
|
...
|
|
|
|
|
|
|
|
Our goal is to produce all possible combinations, i.e. we need:
|
|
|
|
|
|
|
|
{(key_val1, index_tuple1), range_id1}
|
|
|
|
{(key_val1, index_tuple1), range_id2}
|
|
|
|
... ... |
|
|
|
|
{(key_val1, index_tuple1), range_idN},
|
|
|
|
|
|
|
|
{(key_val1, index_tuple2), range_id1}
|
|
|
|
{(key_val1, index_tuple2), range_id2}
|
|
|
|
... ... |
|
|
|
|
{(key_val1, index_tuple2), range_idN},
|
|
|
|
|
|
|
|
... ... ...
|
|
|
|
|
|
|
|
{(key_val1, index_tupleK), range_idN}
|
2010-10-03 12:48:42 +02:00
|
|
|
*/
|
2010-11-02 21:25:35 +01:00
|
|
|
|
2010-10-03 12:48:42 +02:00
|
|
|
class Key_value_records_iterator
|
|
|
|
{
|
2010-11-02 21:25:35 +01:00
|
|
|
/* Use this to get table handler, key buffer and other parameters */
|
|
|
|
Mrr_ordered_index_reader *owner;
|
|
|
|
|
2010-11-08 13:15:50 +01:00
|
|
|
/* Iterator to get (key, range_id) pairs from */
|
|
|
|
Lifo_buffer_iterator identical_key_it;
|
|
|
|
|
|
|
|
/*
|
|
|
|
Last of the identical key values (when we get this pointer from
|
|
|
|
identical_key_it, it will be time to stop).
|
|
|
|
*/
|
2010-10-03 12:48:42 +02:00
|
|
|
uchar *last_identical_key_ptr;
|
2010-11-08 13:15:50 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
FALSE <=> we're right after the init() call, the record has been already
|
2010-12-02 14:21:52 +01:00
|
|
|
read with owner->file->index_read_map() call
|
2010-11-08 13:15:50 +01:00
|
|
|
*/
|
2010-10-03 12:48:42 +02:00
|
|
|
bool get_next_row;
|
2010-10-26 13:35:13 +02:00
|
|
|
|
2010-10-03 12:48:42 +02:00
|
|
|
public:
|
2010-11-08 13:15:50 +01:00
|
|
|
int init(Mrr_ordered_index_reader *owner_arg);
|
2011-03-04 10:06:03 +01:00
|
|
|
int get_next(range_id_t *range_info);
|
2010-11-22 17:34:03 +01:00
|
|
|
void move_to_next_key_value();
|
2010-10-03 12:48:42 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
|
2010-10-26 13:35:13 +02:00
|
|
|
/*
|
2010-11-02 21:25:35 +01:00
|
|
|
Buffer manager interface. Mrr_reader objects use it to inqure DsMrr_impl
|
|
|
|
to manage buffer space for them.
|
2010-10-26 13:35:13 +02:00
|
|
|
*/
|
2010-12-02 12:10:52 +01:00
|
|
|
typedef struct st_buffer_manager
|
2010-10-26 13:35:13 +02:00
|
|
|
{
|
|
|
|
public:
|
2010-12-02 12:10:52 +01:00
|
|
|
/* Opaque value to be passed as the first argument to all member functions */
|
|
|
|
void *arg;
|
|
|
|
|
|
|
|
/*
|
|
|
|
This is called when we've freed more space from the rowid buffer. The
|
|
|
|
callee will get the unused space from the rowid buffer and give it to the
|
|
|
|
key buffer.
|
2010-11-08 13:15:50 +01:00
|
|
|
*/
|
2010-12-02 12:10:52 +01:00
|
|
|
void (*redistribute_buffer_space)(void *arg);
|
2010-11-08 13:15:50 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
This is called when both key and rowid buffers are empty, and so it's time
|
|
|
|
to reset them to their original size (They've lost their original size,
|
|
|
|
because we were dynamically growing rowid buffer and shrinking key buffer).
|
|
|
|
*/
|
2010-12-02 12:10:52 +01:00
|
|
|
void (*reset_buffer_sizes)(void *arg);
|
2010-11-08 13:15:50 +01:00
|
|
|
|
2010-12-02 12:10:52 +01:00
|
|
|
} Buffer_manager;
|
2010-10-26 13:35:13 +02:00
|
|
|
|
|
|
|
|
|
|
|
/*
|
2010-11-02 22:09:28 +01:00
|
|
|
Mrr_reader - DS-MRR execution strategy abstraction
|
2010-10-26 13:35:13 +02:00
|
|
|
|
2010-11-02 21:25:35 +01:00
|
|
|
A reader produces ([index]_record, range_info) pairs, and requires periodic
|
|
|
|
refill operations.
|
2010-10-31 20:00:15 +01:00
|
|
|
|
2010-11-02 21:25:35 +01:00
|
|
|
- one starts using the reader by calling reader->get_next(),
|
|
|
|
- when a get_next() call returns HA_ERR_END_OF_FILE, one must call
|
|
|
|
refill_buffer() before they can make more get_next() calls.
|
|
|
|
- when refill_buffer() returns HA_ERR_END_OF_FILE, this means the real
|
|
|
|
end of stream and get_next() should not be called anymore.
|
2010-10-31 20:00:15 +01:00
|
|
|
|
2010-11-02 21:25:35 +01:00
|
|
|
Both functions can return other error codes, these mean unrecoverable errors
|
|
|
|
after which one cannot continue.
|
2010-10-26 13:35:13 +02:00
|
|
|
*/
|
|
|
|
|
2010-10-28 18:48:16 +02:00
|
|
|
class Mrr_reader
|
2010-10-26 13:35:13 +02:00
|
|
|
{
|
|
|
|
public:
|
2011-03-04 10:06:03 +01:00
|
|
|
virtual int get_next(range_id_t *range_info) = 0;
|
2010-11-25 12:35:21 +01:00
|
|
|
virtual int refill_buffer(bool initial) = 0;
|
2010-10-28 18:48:16 +02:00
|
|
|
virtual ~Mrr_reader() {}; /* just to remove compiler warning */
|
2010-10-26 13:35:13 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
|
2010-11-02 22:09:28 +01:00
|
|
|
/*
|
|
|
|
A common base for readers that do index scans and produce index tuples
|
|
|
|
*/
|
|
|
|
|
2010-10-28 18:48:16 +02:00
|
|
|
class Mrr_index_reader : public Mrr_reader
|
2010-10-26 13:35:13 +02:00
|
|
|
{
|
2010-11-02 22:09:28 +01:00
|
|
|
protected:
|
2010-12-02 14:21:52 +01:00
|
|
|
handler *file; /* Handler object to use */
|
2010-10-26 13:35:13 +02:00
|
|
|
public:
|
|
|
|
virtual int init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
|
|
|
|
void *seq_init_param, uint n_ranges,
|
2010-12-02 12:10:52 +01:00
|
|
|
uint mode, Key_parameters *key_par,
|
|
|
|
Lifo_buffer *key_buffer,
|
|
|
|
Buffer_manager *buf_manager_arg) = 0;
|
2010-11-02 22:09:28 +01:00
|
|
|
|
|
|
|
/* Get pointer to place where every get_next() call will put rowid */
|
2010-11-08 13:15:50 +01:00
|
|
|
virtual uchar *get_rowid_ptr() = 0;
|
2010-11-02 22:09:28 +01:00
|
|
|
/* Get the rowid (call this after get_next() call) */
|
2010-12-08 22:47:33 +01:00
|
|
|
virtual void position();
|
2011-03-04 10:06:03 +01:00
|
|
|
virtual bool skip_record(range_id_t range_id, uchar *rowid) = 0;
|
2010-12-08 22:47:33 +01:00
|
|
|
|
|
|
|
virtual void interrupt_read() {}
|
|
|
|
virtual void resume_read() {}
|
2010-10-26 13:35:13 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
2010-11-08 13:15:50 +01:00
|
|
|
A "bypass" index reader that just does and index scan. The index scan is done
|
|
|
|
by calling default MRR implementation (i.e. handler::multi_range_read_XXX())
|
|
|
|
functions.
|
2010-10-26 13:35:13 +02:00
|
|
|
*/
|
|
|
|
|
|
|
|
class Mrr_simple_index_reader : public Mrr_index_reader
|
|
|
|
{
|
|
|
|
public:
|
2010-12-02 12:10:52 +01:00
|
|
|
int init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
|
2010-10-26 13:35:13 +02:00
|
|
|
void *seq_init_param, uint n_ranges,
|
2010-12-02 12:10:52 +01:00
|
|
|
uint mode, Key_parameters *key_par,
|
|
|
|
Lifo_buffer *key_buffer,
|
|
|
|
Buffer_manager *buf_manager_arg);
|
2011-03-04 10:06:03 +01:00
|
|
|
int get_next(range_id_t *range_info);
|
2010-11-25 12:35:21 +01:00
|
|
|
int refill_buffer(bool initial) { return initial? 0: HA_ERR_END_OF_FILE; }
|
2010-12-02 14:21:52 +01:00
|
|
|
uchar *get_rowid_ptr() { return file->ref; }
|
2011-03-04 10:06:03 +01:00
|
|
|
bool skip_record(range_id_t range_id, uchar *rowid)
|
2010-10-26 13:35:13 +02:00
|
|
|
{
|
2010-12-02 14:21:52 +01:00
|
|
|
return (file->mrr_funcs.skip_record &&
|
|
|
|
file->mrr_funcs.skip_record(file->mrr_iter, range_id, rowid));
|
2010-10-26 13:35:13 +02:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
2010-11-02 22:09:28 +01:00
|
|
|
A reader that sorts the key values before it makes the index lookups.
|
2010-10-26 13:35:13 +02:00
|
|
|
*/
|
|
|
|
|
|
|
|
class Mrr_ordered_index_reader : public Mrr_index_reader
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
int init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
|
|
|
|
void *seq_init_param, uint n_ranges,
|
2010-12-02 12:10:52 +01:00
|
|
|
uint mode, Key_parameters *key_par,
|
|
|
|
Lifo_buffer *key_buffer,
|
|
|
|
Buffer_manager *buf_manager_arg);
|
2011-03-04 10:06:03 +01:00
|
|
|
int get_next(range_id_t *range_info);
|
2010-11-25 12:35:21 +01:00
|
|
|
int refill_buffer(bool initial);
|
2010-12-02 14:21:52 +01:00
|
|
|
uchar *get_rowid_ptr() { return file->ref; }
|
2010-10-26 13:35:13 +02:00
|
|
|
|
2011-03-04 10:06:03 +01:00
|
|
|
bool skip_record(range_id_t range_info, uchar *rowid)
|
2010-10-26 13:35:13 +02:00
|
|
|
{
|
|
|
|
return (mrr_funcs.skip_record &&
|
|
|
|
mrr_funcs.skip_record(mrr_iter, range_info, rowid));
|
|
|
|
}
|
2010-11-08 13:15:50 +01:00
|
|
|
|
2011-03-04 10:06:03 +01:00
|
|
|
bool skip_index_tuple(range_id_t range_info)
|
2010-11-08 13:15:50 +01:00
|
|
|
{
|
|
|
|
return (mrr_funcs.skip_index_tuple &&
|
|
|
|
mrr_funcs.skip_index_tuple(mrr_iter, range_info));
|
|
|
|
}
|
2010-12-20 12:40:12 +01:00
|
|
|
|
2011-03-03 22:54:10 +01:00
|
|
|
bool set_interruption_temp_buffer(uint rowid_length, uint key_len,
|
|
|
|
uint saved_pk_len,
|
2010-12-20 12:40:12 +01:00
|
|
|
uchar **space_start, uchar *space_end);
|
|
|
|
void set_no_interruption_temp_buffer();
|
2010-11-08 13:15:50 +01:00
|
|
|
|
2010-12-08 22:47:33 +01:00
|
|
|
void interrupt_read();
|
|
|
|
void resume_read();
|
|
|
|
void position();
|
2010-10-26 13:35:13 +02:00
|
|
|
private:
|
|
|
|
Key_value_records_iterator kv_it;
|
|
|
|
|
|
|
|
bool scanning_key_val_iter;
|
|
|
|
|
|
|
|
/* Buffer to store (key, range_id) pairs */
|
|
|
|
Lifo_buffer *key_buffer;
|
2010-11-08 13:15:50 +01:00
|
|
|
|
|
|
|
/* This manages key buffer allocation and sizing for us */
|
2010-10-26 13:35:13 +02:00
|
|
|
Buffer_manager *buf_manager;
|
|
|
|
|
2010-11-08 13:15:50 +01:00
|
|
|
Key_parameters keypar; /* index scan and lookup tuple parameters */
|
2010-11-02 21:25:35 +01:00
|
|
|
|
2010-10-26 13:35:13 +02:00
|
|
|
/* TRUE <=> need range association, buffers hold {rowid, range_id} pairs */
|
|
|
|
bool is_mrr_assoc;
|
2010-11-08 13:15:50 +01:00
|
|
|
|
|
|
|
/* Range sequence iteration members */
|
2010-10-26 13:35:13 +02:00
|
|
|
RANGE_SEQ_IF mrr_funcs;
|
|
|
|
range_seq_t mrr_iter;
|
2010-11-25 12:35:21 +01:00
|
|
|
|
|
|
|
/* TRUE == reached eof when enumerating ranges */
|
|
|
|
bool source_exhausted;
|
2011-03-03 22:54:10 +01:00
|
|
|
|
2010-12-16 21:43:52 +01:00
|
|
|
/*
|
2011-03-03 22:54:10 +01:00
|
|
|
Following members are for interrupt_read()/resume_read(). The idea is that
|
|
|
|
in some cases index scan that is done by this object is interrupted by
|
|
|
|
rnd_pos() calls made by Mrr_ordered_rndpos_reader. The problem is that
|
|
|
|
we're sharing handler->record[0] with that object, and it destroys its
|
|
|
|
contents.
|
|
|
|
We need to save/restore our current
|
|
|
|
- index tuple (for pushed index condition checks)
|
|
|
|
- clustered primary key values (again, for pushed index condition checks)
|
|
|
|
- rowid of the last record we've retrieved (in case this rowid matches
|
|
|
|
multiple ranges and we'll need to return it again)
|
|
|
|
*/
|
|
|
|
bool support_scan_interruptions;
|
|
|
|
/* Space where we save the rowid of the last record we've returned */
|
2010-12-08 22:47:33 +01:00
|
|
|
uchar *saved_rowid;
|
2011-03-03 22:54:10 +01:00
|
|
|
|
2010-12-16 21:43:52 +01:00
|
|
|
/* TRUE <=> saved_rowid has the last saved rowid */
|
2010-12-08 22:47:33 +01:00
|
|
|
bool have_saved_rowid;
|
2011-03-03 22:54:10 +01:00
|
|
|
|
|
|
|
uchar *saved_key_tuple; /* Saved current key tuple */
|
|
|
|
uchar *saved_primary_key; /* Saved current primary key tuple */
|
2014-10-16 15:57:13 +02:00
|
|
|
|
2014-10-28 23:37:58 +01:00
|
|
|
/*
|
|
|
|
TRUE<=> saved_key_tuple (and saved_primary_key when applicable) have
|
2014-10-16 15:57:13 +02:00
|
|
|
valid values.
|
|
|
|
*/
|
|
|
|
bool read_was_interrupted;
|
2010-12-19 11:56:12 +01:00
|
|
|
|
2010-11-22 17:34:03 +01:00
|
|
|
static int compare_keys(void* arg, uchar* key1, uchar* key2);
|
|
|
|
static int compare_keys_reverse(void* arg, uchar* key1, uchar* key2);
|
2010-10-26 13:35:13 +02:00
|
|
|
|
|
|
|
friend class Key_value_records_iterator;
|
|
|
|
friend class DsMrr_impl;
|
|
|
|
friend class Mrr_ordered_rndpos_reader;
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2010-11-02 22:09:28 +01:00
|
|
|
/*
|
|
|
|
A reader that gets rowids from an Mrr_index_reader, and then sorts them
|
|
|
|
before getting full records with handler->rndpos() calls.
|
|
|
|
*/
|
2010-10-26 13:35:13 +02:00
|
|
|
|
2010-10-28 18:48:16 +02:00
|
|
|
class Mrr_ordered_rndpos_reader : public Mrr_reader
|
2010-10-26 13:35:13 +02:00
|
|
|
{
|
|
|
|
public:
|
2010-12-02 14:21:52 +01:00
|
|
|
int init(handler *file, Mrr_index_reader *index_reader, uint mode,
|
2020-02-19 07:49:42 +01:00
|
|
|
Lifo_buffer *buf, Rowid_filter *filter);
|
2011-03-04 10:06:03 +01:00
|
|
|
int get_next(range_id_t *range_info);
|
2010-11-25 12:35:21 +01:00
|
|
|
int refill_buffer(bool initial);
|
2010-10-26 13:35:13 +02:00
|
|
|
private:
|
2010-12-02 14:21:52 +01:00
|
|
|
handler *file; /* Handler to use */
|
2010-10-26 13:35:13 +02:00
|
|
|
|
|
|
|
/* This what we get (rowid, range_info) pairs from */
|
|
|
|
Mrr_index_reader *index_reader;
|
2010-11-08 13:15:50 +01:00
|
|
|
|
|
|
|
/* index_reader->get_next() puts rowid here */
|
2010-10-26 13:35:13 +02:00
|
|
|
uchar *index_rowid;
|
2010-11-08 13:15:50 +01:00
|
|
|
|
|
|
|
/* TRUE <=> index_reader->refill_buffer() call has returned EOF */
|
2010-11-01 11:52:10 +01:00
|
|
|
bool index_reader_exhausted;
|
2010-10-26 13:35:13 +02:00
|
|
|
|
2010-12-15 08:45:08 +01:00
|
|
|
/*
|
|
|
|
TRUE <=> We should call index_reader->refill_buffer(). This happens if
|
|
|
|
1. we've made index_reader->get_next() call which returned EOF
|
|
|
|
2. we haven't made any index_reader calls (and our first call should
|
|
|
|
be index_reader->refill_buffer(initial=TRUE)
|
|
|
|
*/
|
2010-11-25 12:35:21 +01:00
|
|
|
bool index_reader_needs_refill;
|
2010-12-15 08:45:08 +01:00
|
|
|
|
2010-10-26 13:35:13 +02:00
|
|
|
/* TRUE <=> need range association, buffers hold {rowid, range_id} pairs */
|
|
|
|
bool is_mrr_assoc;
|
2010-11-08 13:15:50 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
When reading from ordered rowid buffer: the rowid element of the last
|
|
|
|
buffer element that has rowid identical to this one.
|
|
|
|
*/
|
2010-10-26 13:35:13 +02:00
|
|
|
uchar *last_identical_rowid;
|
2010-11-08 13:15:50 +01:00
|
|
|
|
|
|
|
/* Buffer to store (rowid, range_id) pairs */
|
2010-10-26 13:35:13 +02:00
|
|
|
Lifo_buffer *rowid_buffer;
|
|
|
|
|
2020-02-19 07:49:42 +01:00
|
|
|
/* Rowid filter to be checked against (if any) */
|
|
|
|
Rowid_filter *rowid_filter;
|
|
|
|
|
2010-11-25 12:35:21 +01:00
|
|
|
int refill_from_index_reader();
|
2010-10-26 13:35:13 +02:00
|
|
|
};
|
|
|
|
|
2010-11-02 21:25:35 +01:00
|
|
|
|
2010-11-08 13:15:50 +01:00
|
|
|
/*
|
|
|
|
A primitive "factory" of various Mrr_*_reader classes (the point is to
|
|
|
|
get various kinds of readers without having to allocate them on the heap)
|
|
|
|
*/
|
|
|
|
|
2010-10-28 18:48:16 +02:00
|
|
|
class Mrr_reader_factory
|
2010-10-26 13:35:13 +02:00
|
|
|
{
|
|
|
|
public:
|
|
|
|
Mrr_ordered_rndpos_reader ordered_rndpos_reader;
|
|
|
|
Mrr_ordered_index_reader ordered_index_reader;
|
|
|
|
Mrr_simple_index_reader simple_index_reader;
|
|
|
|
};
|
|
|
|
|
2010-11-02 21:25:35 +01:00
|
|
|
|
2011-04-02 12:04:45 +02:00
|
|
|
#define DSMRR_IMPL_SORT_KEYS HA_MRR_IMPLEMENTATION_FLAG1
|
|
|
|
#define DSMRR_IMPL_SORT_ROWIDS HA_MRR_IMPLEMENTATION_FLAG2
|
|
|
|
|
2010-06-22 19:24:22 +02:00
|
|
|
/*
|
|
|
|
DS-MRR implementation for one table. Create/use one object of this class for
|
|
|
|
each ha_{myisam/innobase/etc} object. That object will be further referred to
|
|
|
|
as "the handler"
|
|
|
|
|
2010-09-20 21:13:28 +02:00
|
|
|
DsMrr_impl supports has the following execution strategies:
|
|
|
|
|
|
|
|
- Bypass DS-MRR, pass all calls to default MRR implementation, which is
|
|
|
|
an MRR-to-non-MRR call converter.
|
|
|
|
- Key-Ordered Retrieval
|
|
|
|
- Rowid-Ordered Retrieval
|
|
|
|
|
2010-09-28 10:19:50 +02:00
|
|
|
DsMrr_impl will use one of the above strategies, or a combination of them,
|
2010-09-20 21:13:28 +02:00
|
|
|
according to the following diagram:
|
|
|
|
|
|
|
|
(mrr function calls)
|
|
|
|
|
|
|
|
|
+----------------->-----------------+
|
|
|
|
| |
|
|
|
|
___________v______________ _______________v________________
|
|
|
|
/ default: use lookup keys \ / KEY-ORDERED RETRIEVAL: \
|
|
|
|
| (or ranges) in whatever | | sort lookup keys and then make |
|
|
|
|
| order they are supplied | | index lookups in index order |
|
|
|
|
\__________________________/ \________________________________/
|
|
|
|
| | | | |
|
|
|
|
+---<---+ | +--------------->-----------|----+
|
|
|
|
| | | |
|
|
|
|
| | +---------------+ |
|
|
|
|
| ______v___ ______ | _______________v_______________
|
|
|
|
| / default: read \ | / ROWID-ORDERED RETRIEVAL: \
|
|
|
|
| | table records | | | Before reading table records, |
|
|
|
|
v | in random order | v | sort their rowids and then |
|
|
|
|
| \_________________/ | | read them in rowid order |
|
|
|
|
| | | \_______________________________/
|
|
|
|
| | | |
|
|
|
|
| | | |
|
|
|
|
+-->---+ | +----<------+-----------<--------+
|
|
|
|
| | |
|
|
|
|
v v v
|
|
|
|
(table records and range_ids)
|
|
|
|
|
|
|
|
The choice of strategy depends on MRR scan properties, table properties
|
2010-09-28 10:19:50 +02:00
|
|
|
(whether we're scanning clustered primary key), and @@optimizer_switch
|
2010-09-20 21:13:28 +02:00
|
|
|
settings.
|
|
|
|
|
|
|
|
Key-Ordered Retrieval
|
|
|
|
---------------------
|
|
|
|
The idea is: if MRR scan is essentially a series of lookups on
|
|
|
|
|
|
|
|
tbl.key=value1 OR tbl.key=value2 OR ... OR tbl.key=valueN
|
|
|
|
|
|
|
|
then it makes sense to collect and order the set of lookup values, i.e.
|
|
|
|
|
|
|
|
sort(value1, value2, .. valueN)
|
|
|
|
|
|
|
|
and then do index lookups in index order. This results in fewer index page
|
|
|
|
fetch operations, and we also can avoid making multiple index lookups for the
|
|
|
|
same value. That is, if value1=valueN we can easily discover that after
|
|
|
|
sorting and make one index lookup for them instead of two.
|
|
|
|
|
|
|
|
Rowid-Ordered Retrieval
|
|
|
|
-----------------------
|
|
|
|
If we do a regular index scan or a series of index lookups, we'll be hitting
|
|
|
|
table records at random. For disk-based engines, this is much slower than
|
|
|
|
reading the same records in disk order. We assume that disk ordering of
|
|
|
|
rows is the same as ordering of their rowids (which is provided by
|
|
|
|
handler::cmp_ref())
|
|
|
|
In order to retrieve records in different order, we must separate index
|
|
|
|
scanning and record fetching, that is, MRR scan uses the following steps:
|
2010-06-22 19:24:22 +02:00
|
|
|
|
|
|
|
1. Scan the index (and only index, that is, with HA_EXTRA_KEYREAD on) and
|
2010-09-20 21:13:28 +02:00
|
|
|
fill a buffer with {rowid, range_id} pairs
|
|
|
|
2. Sort the buffer by rowid value
|
2010-06-22 19:24:22 +02:00
|
|
|
3. for each {rowid, range_id} pair in the buffer
|
|
|
|
get record by rowid and return the {record, range_id} pair
|
|
|
|
4. Repeat the above steps until we've exhausted the list of ranges we're
|
|
|
|
scanning.
|
2010-10-26 13:35:13 +02:00
|
|
|
|
|
|
|
Buffer space management considerations
|
|
|
|
--------------------------------------
|
|
|
|
With regards to buffer/memory management, MRR interface specifies that
|
|
|
|
- SQL layer provides multi_range_read_init() with buffer of certain size.
|
|
|
|
- MRR implementation may use (i.e. have at its disposal till the end of
|
|
|
|
the MRR scan) all of the buffer, or return the unused end of the buffer
|
|
|
|
to SQL layer.
|
|
|
|
|
|
|
|
DS-MRR needs buffer in order to accumulate and sort rowids and/or keys. When
|
|
|
|
we need to accumulate/sort only keys (or only rowids), it is fairly trivial.
|
|
|
|
|
|
|
|
When we need to accumulate/sort both keys and rowids, efficient buffer use
|
|
|
|
gets complicated. We need to:
|
|
|
|
- First, accumulate keys and sort them
|
|
|
|
- Then use the keys (smaller values go first) to obtain rowids. A key is not
|
|
|
|
needed after we've got matching rowids for it.
|
|
|
|
- Make sure that rowids are accumulated at the front of the buffer, so that we
|
|
|
|
can return the end part of the buffer to SQL layer, should there be too
|
|
|
|
few rowid values to occupy the buffer.
|
|
|
|
|
|
|
|
All of these goals are achieved by using the following scheme:
|
|
|
|
|
|
|
|
| | We get an empty buffer from SQL layer.
|
|
|
|
|
|
|
|
| *-|
|
|
|
|
| *----| First, we fill the buffer with keys. Key_buffer
|
|
|
|
| *-------| part grows from end of the buffer space to start
|
|
|
|
| *----------| (In this picture, the buffer is big enough to
|
|
|
|
| *-------------| accomodate all keys and even have some space left)
|
|
|
|
|
|
|
|
| *=============| We want to do key-ordered index scan, so we sort
|
|
|
|
the keys
|
|
|
|
|
|
|
|
|-x *===========| Then we use the keys get rowids. Rowids are
|
|
|
|
|----x *========| stored from start of buffer space towards the end.
|
|
|
|
|--------x *=====| The part of the buffer occupied with keys
|
|
|
|
|------------x *===| gradually frees up space for rowids. In this
|
|
|
|
|--------------x *=| picture we run out of keys before we've ran out
|
|
|
|
|----------------x | of buffer space (it can be other way as well).
|
|
|
|
|
|
|
|
|================x | Then we sort the rowids.
|
|
|
|
|
|
|
|
| |~~~| The unused part of the buffer is at the end, so
|
|
|
|
we can return it to the SQL layer.
|
|
|
|
|
|
|
|
|================* Sorted rowids are then used to read table records
|
|
|
|
in disk order
|
|
|
|
|
2009-12-15 08:16:46 +01:00
|
|
|
*/
|
|
|
|
|
2010-12-02 12:10:52 +01:00
|
|
|
class DsMrr_impl
|
2009-12-15 08:16:46 +01:00
|
|
|
{
|
|
|
|
public:
|
|
|
|
typedef void (handler::*range_check_toggle_func_t)(bool on);
|
|
|
|
|
|
|
|
DsMrr_impl()
|
2020-02-20 11:35:19 +01:00
|
|
|
: secondary_file(NULL),
|
|
|
|
rowid_filter(NULL) {};
|
2009-12-15 08:16:46 +01:00
|
|
|
|
2010-06-22 19:24:22 +02:00
|
|
|
void init(handler *h_arg, TABLE *table_arg)
|
|
|
|
{
|
2010-12-02 14:21:52 +01:00
|
|
|
primary_file= h_arg;
|
2010-06-22 19:24:22 +02:00
|
|
|
table= table_arg;
|
|
|
|
}
|
2010-12-02 14:21:52 +01:00
|
|
|
int dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
|
|
|
|
void *seq_init_param, uint n_ranges, uint mode,
|
|
|
|
HANDLER_BUFFER *buf);
|
2010-06-22 19:24:22 +02:00
|
|
|
void dsmrr_close();
|
2011-03-04 10:06:03 +01:00
|
|
|
int dsmrr_next(range_id_t *range_info);
|
2010-06-22 19:24:22 +02:00
|
|
|
|
|
|
|
ha_rows dsmrr_info(uint keyno, uint n_ranges, uint keys, uint key_parts,
|
2012-08-01 16:27:34 +02:00
|
|
|
uint *bufsz, uint *flags, Cost_estimate *cost);
|
2010-06-22 19:24:22 +02:00
|
|
|
|
|
|
|
ha_rows dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq,
|
|
|
|
void *seq_init_param, uint n_ranges, uint *bufsz,
|
2012-08-01 16:27:34 +02:00
|
|
|
uint *flags, Cost_estimate *cost);
|
2011-04-02 12:04:45 +02:00
|
|
|
|
|
|
|
int dsmrr_explain_info(uint mrr_mode, char *str, size_t size);
|
2010-06-22 19:24:22 +02:00
|
|
|
private:
|
2010-10-26 13:35:13 +02:00
|
|
|
/* Buffer to store (key, range_id) pairs */
|
|
|
|
Lifo_buffer *key_buffer;
|
|
|
|
|
2009-12-15 08:16:46 +01:00
|
|
|
/*
|
2010-09-20 11:02:17 +02:00
|
|
|
The "owner" handler object (the one that is expected to "own" this object
|
|
|
|
and call its functions).
|
2009-12-15 08:16:46 +01:00
|
|
|
*/
|
2010-12-02 14:21:52 +01:00
|
|
|
handler *primary_file;
|
|
|
|
TABLE *table; /* Always equal to primary_file->table */
|
2010-06-22 19:24:22 +02:00
|
|
|
|
2010-09-18 23:05:47 +02:00
|
|
|
/*
|
2010-09-20 11:02:17 +02:00
|
|
|
Secondary handler object. (created when needed, we need it when we need
|
2010-09-28 10:19:50 +02:00
|
|
|
to run both index scan and rnd_pos() scan at the same time)
|
2010-09-18 23:05:47 +02:00
|
|
|
*/
|
2010-12-02 14:21:52 +01:00
|
|
|
handler *secondary_file;
|
2020-02-20 11:35:19 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
The rowid filter that DS-MRR has "unpushed" from the storage engine.
|
|
|
|
If it's present, DS-MRR will use it.
|
|
|
|
*/
|
|
|
|
Rowid_filter *rowid_filter;
|
|
|
|
|
2010-09-20 11:02:17 +02:00
|
|
|
uint keyno; /* index we're running the scan on */
|
|
|
|
/* TRUE <=> need range association, buffers hold {rowid, range_id} pairs */
|
|
|
|
bool is_mrr_assoc;
|
|
|
|
|
2010-11-02 21:25:35 +01:00
|
|
|
Mrr_reader_factory reader_factory;
|
2010-11-25 22:30:39 +01:00
|
|
|
|
2010-10-28 18:48:16 +02:00
|
|
|
Mrr_reader *strategy;
|
2010-11-25 22:30:39 +01:00
|
|
|
bool strategy_exhausted;
|
|
|
|
|
2010-10-26 13:35:13 +02:00
|
|
|
Mrr_index_reader *index_strategy;
|
2010-09-20 11:02:17 +02:00
|
|
|
|
|
|
|
/* The whole buffer space that we're using */
|
2010-08-08 09:13:54 +02:00
|
|
|
uchar *full_buf;
|
|
|
|
uchar *full_buf_end;
|
2010-08-12 19:18:41 +02:00
|
|
|
|
2010-09-20 11:02:17 +02:00
|
|
|
/*
|
2010-09-28 10:19:50 +02:00
|
|
|
When using both rowid and key buffers: the boundary between key and rowid
|
2010-09-20 11:02:17 +02:00
|
|
|
parts of the buffer. This is the "original" value, actual memory ranges
|
|
|
|
used by key and rowid parts may be different because of dynamic space
|
|
|
|
reallocation between them.
|
|
|
|
*/
|
2010-08-12 19:18:41 +02:00
|
|
|
uchar *rowid_buffer_end;
|
2010-09-20 11:02:17 +02:00
|
|
|
|
2010-09-21 18:19:54 +02:00
|
|
|
/*
|
|
|
|
One of the following two is used for key buffer: forward is used when
|
|
|
|
we only need key buffer, backward is used when we need both key and rowid
|
|
|
|
buffers.
|
|
|
|
*/
|
|
|
|
Forward_lifo_buffer forward_key_buf;
|
|
|
|
Backward_lifo_buffer backward_key_buf;
|
|
|
|
|
2010-09-20 11:02:17 +02:00
|
|
|
/*
|
|
|
|
Buffer to store (rowid, range_id) pairs, or just rowids if
|
|
|
|
is_mrr_assoc==FALSE
|
|
|
|
*/
|
2010-11-02 21:25:35 +01:00
|
|
|
Forward_lifo_buffer rowid_buffer;
|
2010-09-20 11:02:17 +02:00
|
|
|
|
2009-12-15 08:16:46 +01:00
|
|
|
bool choose_mrr_impl(uint keyno, ha_rows rows, uint *flags, uint *bufsz,
|
2012-08-01 16:27:34 +02:00
|
|
|
Cost_estimate *cost);
|
2019-11-15 21:37:28 +01:00
|
|
|
bool get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
|
|
|
|
uint *buffer_size, uint extra_mem_overhead,
|
|
|
|
Cost_estimate *cost);
|
2013-09-20 12:47:38 +02:00
|
|
|
bool check_cpk_scan(THD *thd, TABLE_SHARE *share, uint keyno, uint mrr_flags);
|
2010-08-08 09:13:54 +02:00
|
|
|
|
2010-12-02 12:10:52 +01:00
|
|
|
bool setup_buffer_sharing(uint key_size_in_keybuf, key_part_map key_tuple_map);
|
|
|
|
|
|
|
|
/* Buffer_manager and its member functions */
|
|
|
|
Buffer_manager buf_manager;
|
|
|
|
static void redistribute_buffer_space(void *dsmrr_arg);
|
|
|
|
static void reset_buffer_sizes(void *dsmrr_arg);
|
|
|
|
static void do_nothing(void *dsmrr_arg);
|
|
|
|
|
2010-10-26 13:35:13 +02:00
|
|
|
Lifo_buffer* get_key_buffer() { return key_buffer; }
|
|
|
|
|
2010-10-03 12:48:42 +02:00
|
|
|
friend class Key_value_records_iterator;
|
2010-10-26 13:35:13 +02:00
|
|
|
friend class Mrr_ordered_index_reader;
|
|
|
|
friend class Mrr_ordered_rndpos_reader;
|
|
|
|
|
|
|
|
int setup_two_handlers();
|
|
|
|
void close_second_handler();
|
2009-12-15 08:16:46 +01:00
|
|
|
};
|
|
|
|
|
2010-09-28 10:19:50 +02:00
|
|
|
/**
|
|
|
|
@} (end of group DS-MRR declarations)
|
|
|
|
*/
|
2010-09-20 11:02:17 +02:00
|
|
|
|