mariadb/sql/opt_histogram_json.h
2022-01-19 18:10:10 +03:00

128 lines
3.8 KiB
C++

/*
Copyright (c) 2021, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
#include "sql_statistics.h"
/*
An equi-height histogram which stores real values for bucket bounds.
Handles @@histogram_type=JSON_HB
Histogram format in JSON:
{
"histogram_hb_v2": [
{ "start": "value", "size":nnn.nn, "ndv": nnn },
...
{ "start": "value", "size":nnn.nn, "ndv": nnn, "end": "value"}
]
}
The histogram is an object with single member named "histogram_hb_v2".
The value of that member is an array of buckets.
Each bucket is an object with these members:
"start" - the first value in the bucket.
"size" - fraction of table rows that is contained in the bucket.
"ndv" - Number of Distinct Values in the bucket.
"end" - Optionally, the last value in the bucket.
A bucket is a single-point bucket if it has ndv=1.
Most buckets have no "end" member: the bucket is assumed to contain all
values up to the "start" of the next bucket.
The exception is single-point buckets where last value is the same as the
first value.
*/
class Histogram_json_hb : public Histogram_base
{
size_t size; /* Number of elements in the histogram */
/* Collection-time only: collected histogram in the JSON form. */
std::string json_text;
struct Bucket
{
// The left endpoint in KeyTupleFormat. The endpoint is inclusive, this
// value is in this bucket.
std::string start_value;
// Cumulative fraction: The fraction of table rows that fall into this
// and preceding buckets.
double cum_fract;
// Number of distinct values in the bucket.
longlong ndv;
};
std::vector<Bucket> buckets;
std::string last_bucket_end_endp;
public:
static constexpr const char* JSON_NAME="histogram_hb_v2";
bool parse(MEM_ROOT *mem_root, Field *field, Histogram_type type_arg,
const char *hist_data, size_t hist_data_len) override;
void serialize(Field *field) override;
Histogram_builder *create_builder(Field *col, uint col_len,
ha_rows rows) override;
// returns number of buckets in the histogram
uint get_width() override
{
return (uint)size;
}
Histogram_type get_type() override
{
return JSON_HB;
}
/*
@brief
This used to be the size of the histogram on disk, which was redundant
(one can check the size directly). Return the number of buckets instead.
*/
uint get_size() override
{
return (uint)size;
}
void init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg,
ulonglong size) override;
double point_selectivity(Field *field, key_range *endpoint,
double avg_selection) override;
double range_selectivity(Field *field, key_range *min_endp,
key_range *max_endp) override;
void set_json_text(ulonglong sz, const char *json_text_arg,
size_t json_text_len)
{
size= (size_t) sz;
json_text.assign(json_text_arg, json_text_len);
}
private:
double get_left_fract(int idx);
std::string& get_end_value(int idx);
int find_bucket(Field *field, const uchar *lookup_val, bool *equal);
};