2012-09-01 23:21:59 +02:00
|
|
|
/* Copyright (c) 2010, 2012 Oracle and/or its affiliates. All rights reserved.
|
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation; version 2 of the License.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program; if not, write to the Free Software
|
2019-05-11 21:19:05 +02:00
|
|
|
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */
|
2012-09-01 23:21:59 +02:00
|
|
|
|
|
|
|
#ifndef FILESORT_UTILS_INCLUDED
|
|
|
|
#define FILESORT_UTILS_INCLUDED
|
|
|
|
|
|
|
|
#include "my_base.h"
|
|
|
|
#include "sql_array.h"
|
|
|
|
|
|
|
|
class Sort_param;
|
|
|
|
/*
|
|
|
|
Calculate cost of merge sort
|
|
|
|
|
|
|
|
@param num_rows Total number of rows.
|
|
|
|
@param num_keys_per_buffer Number of keys per buffer.
|
|
|
|
@param elem_size Size of each element.
|
|
|
|
|
|
|
|
Calculates cost of merge sort by simulating call to merge_many_buff().
|
|
|
|
|
|
|
|
@retval
|
|
|
|
Computed cost of merge sort in disk seeks.
|
|
|
|
|
|
|
|
@note
|
|
|
|
Declared here in order to be able to unit test it,
|
|
|
|
since library dependencies have not been sorted out yet.
|
|
|
|
|
|
|
|
See also comments get_merge_many_buffs_cost().
|
|
|
|
*/
|
|
|
|
|
|
|
|
double get_merge_many_buffs_cost_fast(ha_rows num_rows,
|
|
|
|
ha_rows num_keys_per_buffer,
|
|
|
|
uint elem_size);
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
A wrapper class around the buffer used by filesort().
|
2020-01-20 21:07:47 +01:00
|
|
|
The sort buffer is a contiguous chunk of memory,
|
|
|
|
containing both records to be sorted, and pointers to said records:
|
|
|
|
|
|
|
|
<start of buffer | still unused | end of buffer>
|
|
|
|
|rec 0|record 1 |rec 2| ............ |ptr to rec2|ptr to rec1|ptr to rec0|
|
|
|
|
|
|
|
|
Records will be inserted "left-to-right". Records are not necessarily
|
|
|
|
fixed-size, they can be packed and stored without any "gaps".
|
|
|
|
|
|
|
|
Record pointers will be inserted "right-to-left", as a side-effect
|
|
|
|
of inserting the actual records.
|
2012-09-01 23:21:59 +02:00
|
|
|
|
|
|
|
We wrap the buffer in order to be able to do lazy initialization of the
|
|
|
|
pointers: the buffer is often much larger than what we actually need.
|
|
|
|
|
2020-01-20 21:07:47 +01:00
|
|
|
With this allocation scheme, and lazy initialization of the pointers,
|
|
|
|
we are able to pack variable-sized records in the buffer,
|
|
|
|
and thus possibly have space for more records than we initially estimated.
|
|
|
|
|
2012-09-01 23:21:59 +02:00
|
|
|
The buffer must be kept available for multiple executions of the
|
|
|
|
same sort operation, so we have explicit allocate and free functions,
|
|
|
|
rather than doing alloc/free in CTOR/DTOR.
|
|
|
|
*/
|
2020-01-20 21:07:47 +01:00
|
|
|
|
2012-09-01 23:21:59 +02:00
|
|
|
class Filesort_buffer
|
|
|
|
{
|
|
|
|
public:
|
2020-01-20 21:07:47 +01:00
|
|
|
Filesort_buffer() :
|
|
|
|
m_next_rec_ptr(NULL), m_rawmem(NULL), m_record_pointers(NULL),
|
|
|
|
m_sort_keys(NULL),
|
|
|
|
m_num_records(0), m_record_length(0),
|
|
|
|
m_sort_length(0),
|
|
|
|
m_size_in_bytes(0), m_idx(0)
|
2012-09-01 23:21:59 +02:00
|
|
|
{}
|
2020-01-20 21:07:47 +01:00
|
|
|
|
|
|
|
/** Sort me... */
|
|
|
|
void sort_buffer(const Sort_param *param, uint count);
|
|
|
|
|
|
|
|
/**
|
|
|
|
Reverses the record pointer array, to avoid recording new results for
|
|
|
|
non-deterministic mtr tests.
|
|
|
|
*/
|
|
|
|
void reverse_record_pointers()
|
2016-03-22 20:51:59 +01:00
|
|
|
{
|
2020-01-20 21:07:47 +01:00
|
|
|
if (m_idx < 2) // There is nothing to swap.
|
|
|
|
return;
|
|
|
|
uchar **keys= get_sort_keys();
|
|
|
|
const longlong count= m_idx - 1;
|
|
|
|
for (longlong ix= 0; ix <= count/2; ++ix)
|
|
|
|
{
|
|
|
|
uchar *tmp= keys[count - ix];
|
|
|
|
keys[count - ix] = keys[ix];
|
|
|
|
keys[ix]= tmp;
|
|
|
|
}
|
2016-03-22 20:51:59 +01:00
|
|
|
}
|
|
|
|
|
2020-01-20 21:07:47 +01:00
|
|
|
/**
|
|
|
|
Initializes all the record pointers.
|
|
|
|
*/
|
|
|
|
void init_record_pointers()
|
2016-03-22 20:51:59 +01:00
|
|
|
{
|
2020-01-20 21:07:47 +01:00
|
|
|
init_next_record_pointer();
|
|
|
|
while (m_idx < m_num_records)
|
|
|
|
(void) get_next_record_pointer();
|
|
|
|
reverse_record_pointers();
|
2016-03-22 20:51:59 +01:00
|
|
|
}
|
2020-01-20 21:07:47 +01:00
|
|
|
|
|
|
|
/**
|
|
|
|
Prepares the buffer for the next batch of records to process.
|
|
|
|
*/
|
|
|
|
void init_next_record_pointer()
|
2016-03-22 20:51:59 +01:00
|
|
|
{
|
2020-01-20 21:07:47 +01:00
|
|
|
m_idx= 0;
|
|
|
|
m_next_rec_ptr= m_rawmem;
|
|
|
|
m_sort_keys= NULL;
|
2016-03-22 20:51:59 +01:00
|
|
|
}
|
2012-09-01 23:21:59 +02:00
|
|
|
|
2020-01-20 21:07:47 +01:00
|
|
|
/**
|
|
|
|
@returns the number of bytes currently in use for data.
|
|
|
|
*/
|
|
|
|
size_t space_used_for_data() const
|
|
|
|
{
|
|
|
|
return m_next_rec_ptr ? m_next_rec_ptr - m_rawmem : 0;
|
|
|
|
}
|
2012-09-01 23:21:59 +02:00
|
|
|
|
2020-01-20 21:07:47 +01:00
|
|
|
/**
|
|
|
|
@returns the number of bytes left in the buffer.
|
|
|
|
*/
|
|
|
|
size_t spaceleft() const
|
2012-09-01 23:21:59 +02:00
|
|
|
{
|
2020-01-20 21:07:47 +01:00
|
|
|
DBUG_ASSERT(m_next_rec_ptr >= m_rawmem);
|
|
|
|
const size_t spaceused=
|
|
|
|
(m_next_rec_ptr - m_rawmem) +
|
|
|
|
(static_cast<size_t>(m_idx) * sizeof(uchar*));
|
|
|
|
return m_size_in_bytes - spaceused;
|
2012-09-01 23:21:59 +02:00
|
|
|
}
|
|
|
|
|
2020-01-20 21:07:47 +01:00
|
|
|
/**
|
|
|
|
Is the buffer full?
|
|
|
|
*/
|
|
|
|
bool isfull() const
|
|
|
|
{
|
|
|
|
if (m_idx < m_num_records)
|
|
|
|
return false;
|
|
|
|
return spaceleft() < (m_record_length + sizeof(uchar*));
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
Where should the next record be stored?
|
|
|
|
*/
|
|
|
|
uchar *get_next_record_pointer()
|
|
|
|
{
|
|
|
|
uchar *retval= m_next_rec_ptr;
|
|
|
|
// Save the return value in the record pointer array.
|
|
|
|
m_record_pointers[-m_idx]= m_next_rec_ptr;
|
|
|
|
// Prepare for the subsequent request.
|
|
|
|
m_idx++;
|
|
|
|
m_next_rec_ptr+= m_record_length;
|
|
|
|
return retval;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
Adjusts for actual record length. get_next_record_pointer() above was
|
|
|
|
pessimistic, and assumed that the record could not be packed.
|
|
|
|
*/
|
|
|
|
void adjust_next_record_pointer(uint val)
|
2012-09-01 23:21:59 +02:00
|
|
|
{
|
2020-01-20 21:07:47 +01:00
|
|
|
m_next_rec_ptr-= (m_record_length - val);
|
2012-09-01 23:21:59 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Returns total size: pointer array + record buffers.
|
|
|
|
size_t sort_buffer_size() const
|
|
|
|
{
|
2020-01-20 21:07:47 +01:00
|
|
|
return m_size_in_bytes;
|
2012-09-01 23:21:59 +02:00
|
|
|
}
|
|
|
|
|
2020-01-20 21:07:47 +01:00
|
|
|
bool is_allocated() const
|
|
|
|
{
|
|
|
|
return m_rawmem;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
Allocates the buffer, but does *not* initialize pointers.
|
|
|
|
Total size = (num_records * record_length) + (num_records * sizeof(pointer))
|
|
|
|
space for records space for pointer to records
|
|
|
|
Caller is responsible for raising an error if allocation fails.
|
|
|
|
|
|
|
|
@param num_records Number of records.
|
|
|
|
@param record_length (maximum) size of each record.
|
|
|
|
@returns Pointer to allocated area, or NULL in case of out-of-memory.
|
|
|
|
*/
|
|
|
|
uchar *alloc_sort_buffer(uint num_records, uint record_length);
|
2012-09-01 23:21:59 +02:00
|
|
|
|
|
|
|
/// Frees the buffer.
|
|
|
|
void free_sort_buffer();
|
|
|
|
|
2020-01-20 21:07:47 +01:00
|
|
|
void reset()
|
|
|
|
{
|
|
|
|
m_rawmem= NULL;
|
|
|
|
}
|
|
|
|
/**
|
|
|
|
Used to access the "right-to-left" array of record pointers as an ordinary
|
|
|
|
"left-to-right" array, so that we can pass it directly on to std::sort().
|
|
|
|
*/
|
|
|
|
uchar **get_sort_keys()
|
|
|
|
{
|
|
|
|
if (m_idx == 0)
|
|
|
|
return NULL;
|
|
|
|
return &m_record_pointers[1 - m_idx];
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
Gets sorted record number ix. @see get_sort_keys()
|
|
|
|
Only valid after buffer has been sorted!
|
|
|
|
*/
|
|
|
|
uchar *get_sorted_record(uint ix)
|
|
|
|
{
|
|
|
|
return m_sort_keys[ix];
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
@returns The entire buffer, as a character array.
|
|
|
|
This is for reusing the memory for merge buffers.
|
|
|
|
*/
|
|
|
|
Bounds_checked_array<uchar> get_raw_buf()
|
|
|
|
{
|
|
|
|
return Bounds_checked_array<uchar>(m_rawmem, m_size_in_bytes);
|
|
|
|
}
|
2012-09-01 23:21:59 +02:00
|
|
|
|
|
|
|
/**
|
|
|
|
We need an assignment operator, see filesort().
|
|
|
|
This happens to have the same semantics as the one that would be
|
2023-02-07 12:57:20 +01:00
|
|
|
generated by the compiler.
|
|
|
|
Note that this is a shallow copy. We have two objects sharing the same
|
|
|
|
array.
|
2012-09-01 23:21:59 +02:00
|
|
|
*/
|
2023-02-07 12:57:20 +01:00
|
|
|
Filesort_buffer &operator=(const Filesort_buffer &rhs) = default;
|
2012-09-01 23:21:59 +02:00
|
|
|
|
2020-01-20 21:07:47 +01:00
|
|
|
uint get_sort_length() const { return m_sort_length; }
|
|
|
|
void set_sort_length(uint val) { m_sort_length= val; }
|
|
|
|
|
2012-09-01 23:21:59 +02:00
|
|
|
private:
|
2020-01-20 21:07:47 +01:00
|
|
|
uchar *m_next_rec_ptr; /// The next record will be inserted here.
|
|
|
|
uchar *m_rawmem; /// The raw memory buffer.
|
|
|
|
uchar **m_record_pointers; /// The "right-to-left" array of record pointers.
|
|
|
|
uchar **m_sort_keys; /// Caches the value of get_sort_keys()
|
|
|
|
uint m_num_records; /// Saved value from alloc_sort_buffer()
|
|
|
|
uint m_record_length; /// Saved value from alloc_sort_buffer()
|
|
|
|
uint m_sort_length; /// The length of the sort key.
|
|
|
|
size_t m_size_in_bytes; /// Size of raw buffer, in bytes.
|
2012-09-01 23:21:59 +02:00
|
|
|
|
2020-01-20 21:07:47 +01:00
|
|
|
/**
|
|
|
|
This is the index in the "right-to-left" array of the next record to
|
|
|
|
be inserted into the buffer. It is signed, because we use it in signed
|
|
|
|
expressions like:
|
|
|
|
m_record_pointers[-m_idx];
|
|
|
|
It is longlong rather than int, to ensure that it covers UINT_MAX32
|
|
|
|
without any casting/warning.
|
|
|
|
*/
|
|
|
|
longlong m_idx;
|
2012-09-01 23:21:59 +02:00
|
|
|
};
|
|
|
|
|
2024-10-26 16:17:03 +02:00
|
|
|
int compare_packed_sort_keys(void *sort_param, const void *a_ptr,
|
|
|
|
const void *b_ptr);
|
|
|
|
qsort_cmp2 get_packed_keys_compare_ptr();
|
2020-03-10 00:26:38 +01:00
|
|
|
|
2012-09-01 23:21:59 +02:00
|
|
|
#endif // FILESORT_UTILS_INCLUDED
|