mirror of
https://github.com/MariaDB/server.git
synced 2025-01-27 01:04:19 +01:00
d5e15424d8
The MDEV-29693 conflict resolution is from Monty, as well as is a bug fix where ANALYZE TABLE wrongly built histograms for single-column PRIMARY KEY. Also includes a fix for safe_malloc error reporting. Other things: - Copied main.log_slow from 10.4 to avoid mtr issue Disabled test: - spider/bugfix.mdev_27239 because we started to get +Error 1429 Unable to connect to foreign data source: localhost -Error 1158 Got an error reading communication packets - main.delayed - Bug#54332 Deadlock with two connections doing LOCK TABLE+INSERT DELAYED This part is disabled for now as it fails randomly with different warnings/errors (no corruption).
147 lines
4.5 KiB
C++
147 lines
4.5 KiB
C++
/*
|
|
Copyright (c) 2021, MariaDB Corporation.
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; version 2 of the License.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
|
|
|
|
#include "sql_statistics.h"
|
|
|
|
/*
|
|
An equi-height histogram which stores real values for bucket bounds.
|
|
|
|
Handles @@histogram_type=JSON_HB
|
|
|
|
Histogram format in JSON:
|
|
|
|
{
|
|
// The next three are saved but not currently analyzed:
|
|
"target_histogram_size": nnn,
|
|
"collected_at": "(date and time)",
|
|
"collected_by": "(server version)",
|
|
|
|
"histogram_hb": [
|
|
{ "start": "value", "size":nnn.nn, "ndv": nnn },
|
|
...
|
|
|
|
// Optionally, start and/or end can be replaced with _hex variant
|
|
{ "start_hex: "value", "size":nnn.nn, "ndv":nnn},
|
|
|
|
...
|
|
{ "start": "value", "size":nnn.nn, "ndv": nnn, "end": "value"},
|
|
]
|
|
}
|
|
|
|
Histogram is a JSON object. It has some global properties and "histogram_hb"
|
|
member whose value is a JSON array of histogram buckets.
|
|
|
|
Each bucket is an object with these members:
|
|
"start" - the first value in the bucket.
|
|
"size" - fraction of table rows that is contained in the bucket.
|
|
"ndv" - Number of Distinct Values in the bucket.
|
|
"end" - Optionally, the last value in the bucket.
|
|
|
|
A bucket is a single-point bucket if it has ndv=1.
|
|
|
|
Most buckets have no "end" member: the bucket is assumed to contain all
|
|
values up to the "start" of the next bucket.
|
|
|
|
The exception is single-point buckets where last value is the same as the
|
|
first value.
|
|
|
|
start/end can be replaced with start_hex/end_hex. In _hex variant, the
|
|
constant is encoded in hex. This encoding is used to handle so called
|
|
"unassigned characters": some non-UTF8 charsets have byte combinations that
|
|
are not mapped to any UTF8 character.
|
|
*/
|
|
|
|
class Histogram_json_hb final : public Histogram_base
|
|
{
|
|
size_t size; /* Number of elements in the histogram */
|
|
|
|
/* Collection-time only: collected histogram in the JSON form. */
|
|
std::string json_text;
|
|
|
|
struct Bucket
|
|
{
|
|
// The left endpoint in KeyTupleFormat. The endpoint is inclusive, this
|
|
// value is in this bucket.
|
|
std::string start_value;
|
|
|
|
// Cumulative fraction: The fraction of table rows that fall into this
|
|
// and preceding buckets.
|
|
double cum_fract;
|
|
|
|
// Number of distinct values in the bucket.
|
|
longlong ndv;
|
|
};
|
|
|
|
std::vector<Bucket> buckets;
|
|
|
|
std::string last_bucket_end_endp;
|
|
|
|
public:
|
|
static constexpr const char* JSON_NAME="histogram_hb";
|
|
|
|
bool parse(MEM_ROOT *mem_root, const char *db_name, const char *table_name,
|
|
Field *field, const char *hist_data,
|
|
size_t hist_data_len) override;
|
|
|
|
void serialize(Field *field) override;
|
|
|
|
Histogram_builder *create_builder(Field *col, uint col_len,
|
|
ha_rows rows) override;
|
|
|
|
// returns number of buckets in the histogram
|
|
uint get_width() override
|
|
{
|
|
return (uint)size;
|
|
}
|
|
|
|
Histogram_type get_type() override
|
|
{
|
|
return JSON_HB;
|
|
}
|
|
|
|
/*
|
|
@brief
|
|
This used to be the size of the histogram on disk, which was redundant
|
|
(one can check the size directly). Return the number of buckets instead.
|
|
*/
|
|
uint get_size() override
|
|
{
|
|
return (uint)size;
|
|
}
|
|
void init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg,
|
|
ulonglong size) override;
|
|
|
|
double point_selectivity(Field *field, key_range *endpoint,
|
|
double avg_sel) override;
|
|
double range_selectivity(Field *field, key_range *min_endp,
|
|
key_range *max_endp, double avg_sel) override;
|
|
|
|
void set_json_text(ulonglong sz, const char *json_text_arg,
|
|
size_t json_text_len)
|
|
{
|
|
size= (size_t) sz;
|
|
json_text.assign(json_text_arg, json_text_len);
|
|
}
|
|
|
|
private:
|
|
int parse_bucket(json_engine_t *je, Field *field, double *cumulative_size,
|
|
bool *assigned_last_end, const char **err);
|
|
|
|
double get_left_fract(int idx);
|
|
std::string& get_end_value(int idx);
|
|
int find_bucket(const Field *field, const uchar *lookup_val, int *cmp);
|
|
};
|
|
|