mirror of
https://github.com/MariaDB/server.git
synced 2025-01-16 03:52:35 +01:00
Fixed bug mdev-4369.
The function was adjusted to be able to aggregate the counters of the merged elements. Before this change it was not possible to guarantee the correctness of the counters passed to the the call-back parameter walk_action. As a result, when some elements of a Unique object were flushed into disk the function passed to merge_walk() as the call-back parameter could return wrong counters of elements. This could lead to building wrong histograms.
This commit is contained in:
parent
1c30fb2a15
commit
10f0530b22
5 changed files with 121 additions and 6 deletions
|
@ -1511,4 +1511,43 @@ test t1 a 1 5 0.0000 1.0000 10 DOUBLE_PREC_HB 0000FF3FFF7FFFBFFFFF
|
|||
set histogram_size=default;
|
||||
set histogram_type=default;
|
||||
drop table t1;
|
||||
#
|
||||
# Bug mdev-4369: histogram for a column with many distinct values
|
||||
#
|
||||
CREATE TABLE t1 (id int);
|
||||
CREATE TABLE t2 (id int);
|
||||
INSERT INTO t1 (id) VALUES (1), (1), (1),(1);
|
||||
INSERT INTO t1 (id) SELECT id FROM t1;
|
||||
INSERT INTO t1 SELECT id+1 FROM t1;
|
||||
INSERT INTO t1 SELECT id+2 FROM t1;
|
||||
INSERT INTO t1 SELECT id+4 FROM t1;
|
||||
INSERT INTO t1 SELECT id+8 FROM t1;
|
||||
INSERT INTO t1 SELECT id+16 FROM t1;
|
||||
INSERT INTO t1 SELECT id+32 FROM t1;
|
||||
INSERT INTO t1 SELECT id+64 FROM t1;
|
||||
INSERT INTO t1 SELECT id+128 FROM t1;
|
||||
INSERT INTO t1 SELECT id+256 FROM t1;
|
||||
INSERT INTO t1 SELECT id+512 FROM t1;
|
||||
INSERT INTO t2 SELECT id FROM t1 ORDER BY id*rand();
|
||||
SELECT COUNT(*) FROM t2;
|
||||
COUNT(*)
|
||||
8192
|
||||
SELECT COUNT(DISTINCT id) FROM t2;
|
||||
COUNT(DISTINCT id)
|
||||
1024
|
||||
set @@tmp_table_size=1024*16;
|
||||
set @@max_heap_table_size=1024*16;
|
||||
set histogram_size=63;
|
||||
analyze table t2 persistent for all;
|
||||
Table Op Msg_type Msg_text
|
||||
test.t2 analyze status OK
|
||||
select db_name, table_name, column_name,
|
||||
min_value, max_value,
|
||||
nulls_ratio, avg_frequency,
|
||||
hist_size, hist_type, HEX(histogram)
|
||||
FROM mysql.column_stats;
|
||||
db_name table_name column_name min_value max_value nulls_ratio avg_frequency hist_size hist_type HEX(histogram)
|
||||
test t2 id 1 1024 0.0000 8.0000 63 SINGLE_PREC_HB 03070B0F13171B1F23272B2F33373B3F43474B4F53575B5F63676B6F73777B7F83878B8F93979B9FA3A7ABAFB3B7BBBFC3C7CBCFD3D7DBDFE3E7EBEFF3F7FB
|
||||
set histogram_size=default;
|
||||
drop table t1, t2;
|
||||
set use_stat_tables=@save_use_stat_tables;
|
||||
|
|
|
@ -638,5 +638,48 @@ set histogram_type=default;
|
|||
|
||||
drop table t1;
|
||||
|
||||
--echo #
|
||||
--echo # Bug mdev-4369: histogram for a column with many distinct values
|
||||
--echo #
|
||||
|
||||
|
||||
CREATE TABLE t1 (id int);
|
||||
CREATE TABLE t2 (id int);
|
||||
|
||||
INSERT INTO t1 (id) VALUES (1), (1), (1),(1);
|
||||
INSERT INTO t1 (id) SELECT id FROM t1;
|
||||
INSERT INTO t1 SELECT id+1 FROM t1;
|
||||
INSERT INTO t1 SELECT id+2 FROM t1;
|
||||
INSERT INTO t1 SELECT id+4 FROM t1;
|
||||
INSERT INTO t1 SELECT id+8 FROM t1;
|
||||
INSERT INTO t1 SELECT id+16 FROM t1;
|
||||
INSERT INTO t1 SELECT id+32 FROM t1;
|
||||
INSERT INTO t1 SELECT id+64 FROM t1;
|
||||
INSERT INTO t1 SELECT id+128 FROM t1;
|
||||
INSERT INTO t1 SELECT id+256 FROM t1;
|
||||
INSERT INTO t1 SELECT id+512 FROM t1;
|
||||
|
||||
INSERT INTO t2 SELECT id FROM t1 ORDER BY id*rand();
|
||||
|
||||
SELECT COUNT(*) FROM t2;
|
||||
SELECT COUNT(DISTINCT id) FROM t2;
|
||||
|
||||
set @@tmp_table_size=1024*16;
|
||||
set @@max_heap_table_size=1024*16;
|
||||
|
||||
set histogram_size=63;
|
||||
|
||||
analyze table t2 persistent for all;
|
||||
|
||||
select db_name, table_name, column_name,
|
||||
min_value, max_value,
|
||||
nulls_ratio, avg_frequency,
|
||||
hist_size, hist_type, HEX(histogram)
|
||||
FROM mysql.column_stats;
|
||||
|
||||
set histogram_size=default;
|
||||
|
||||
drop table t1, t2;
|
||||
|
||||
set use_stat_tables=@save_use_stat_tables;
|
||||
|
||||
|
|
|
@ -3997,6 +3997,7 @@ class Unique :public Sql_alloc
|
|||
uint size;
|
||||
uint full_size;
|
||||
uint min_dupl_count; /* always 0 for unions, > 0 for intersections */
|
||||
bool with_counters;
|
||||
|
||||
bool merge(TABLE *table, uchar *buff, bool without_last_merge);
|
||||
|
||||
|
|
|
@ -1348,7 +1348,7 @@ public:
|
|||
tree_key_length= field->pack_length();
|
||||
|
||||
tree= new Unique((qsort_cmp2) simple_str_key_cmp, (void*) field,
|
||||
tree_key_length, max_heap_table_size);
|
||||
tree_key_length, max_heap_table_size, 1);
|
||||
}
|
||||
|
||||
virtual ~Count_distinct_field()
|
||||
|
@ -1435,7 +1435,7 @@ public:
|
|||
|
||||
tree= new Unique((qsort_cmp2) simple_ulonglong_key_cmp,
|
||||
(void*) &tree_key_length,
|
||||
tree_key_length, max_heap_table_size);
|
||||
tree_key_length, max_heap_table_size, 1);
|
||||
}
|
||||
|
||||
bool add()
|
||||
|
|
|
@ -86,6 +86,7 @@ Unique::Unique(qsort_cmp2 comp_func, void * comp_func_fixed_arg,
|
|||
full_size= size;
|
||||
if (min_dupl_count_arg)
|
||||
full_size+= sizeof(element_count);
|
||||
with_counters= test(min_dupl_count_arg);
|
||||
my_b_clear(&file);
|
||||
init_tree(&tree, (ulong) (max_in_memory_size / 16), 0, size, comp_func,
|
||||
NULL, comp_func_fixed_arg, MYF(MY_THREAD_SPECIFIC));
|
||||
|
@ -428,6 +429,22 @@ static int buffpek_compare(void *arg, uchar *key_ptr1, uchar *key_ptr2)
|
|||
C_MODE_END
|
||||
|
||||
|
||||
inline
|
||||
element_count get_counter_from_merged_element(void *ptr, uint ofs)
|
||||
{
|
||||
element_count cnt;
|
||||
memcpy((uchar *) &cnt, (uchar *) ptr + ofs, sizeof(element_count));
|
||||
return cnt;
|
||||
}
|
||||
|
||||
|
||||
inline
|
||||
void put_counter_into_merged_element(void *ptr, uint ofs, element_count cnt)
|
||||
{
|
||||
memcpy((uchar *) ptr + ofs, (uchar *) &cnt, sizeof(element_count));
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
DESCRIPTION
|
||||
|
||||
|
@ -457,6 +474,8 @@ C_MODE_END
|
|||
file file with all trees dumped. Trees in the file
|
||||
must contain sorted unique values. Cache must be
|
||||
initialized in read mode.
|
||||
with counters take into account counters for equal merged
|
||||
elements
|
||||
RETURN VALUE
|
||||
0 ok
|
||||
<> 0 error
|
||||
|
@ -466,7 +485,7 @@ static bool merge_walk(uchar *merge_buffer, ulong merge_buffer_size,
|
|||
uint key_length, BUFFPEK *begin, BUFFPEK *end,
|
||||
tree_walk_action walk_action, void *walk_action_arg,
|
||||
qsort_cmp2 compare, void *compare_arg,
|
||||
IO_CACHE *file)
|
||||
IO_CACHE *file, bool with_counters)
|
||||
{
|
||||
BUFFPEK_COMPARE_CONTEXT compare_context = { compare, compare_arg };
|
||||
QUEUE queue;
|
||||
|
@ -485,6 +504,8 @@ static bool merge_walk(uchar *merge_buffer, ulong merge_buffer_size,
|
|||
uint bytes_read; /* to hold return value of read_to_buffer */
|
||||
BUFFPEK *top;
|
||||
int res= 1;
|
||||
uint cnt_ofs= key_length - (with_counters ? sizeof(element_count) : 0);
|
||||
element_count cnt;
|
||||
/*
|
||||
Invariant: queue must contain top element from each tree, until a tree
|
||||
is not completely walked through.
|
||||
|
@ -543,9 +564,17 @@ static bool merge_walk(uchar *merge_buffer, ulong merge_buffer_size,
|
|||
/* new top has been obtained; if old top is unique, apply the action */
|
||||
if (compare(compare_arg, old_key, top->key))
|
||||
{
|
||||
if (walk_action(old_key, 1, walk_action_arg))
|
||||
cnt= with_counters ?
|
||||
get_counter_from_merged_element(old_key, cnt_ofs) : 1;
|
||||
if (walk_action(old_key, cnt, walk_action_arg))
|
||||
goto end;
|
||||
}
|
||||
else if (with_counters)
|
||||
{
|
||||
cnt= get_counter_from_merged_element(top->key, cnt_ofs);
|
||||
cnt+= get_counter_from_merged_element(old_key, cnt_ofs);
|
||||
put_counter_into_merged_element(top->key, cnt_ofs, cnt);
|
||||
}
|
||||
}
|
||||
/*
|
||||
Applying walk_action to the tail of the last tree: this is safe because
|
||||
|
@ -556,7 +585,10 @@ static bool merge_walk(uchar *merge_buffer, ulong merge_buffer_size,
|
|||
{
|
||||
do
|
||||
{
|
||||
if (walk_action(top->key, 1, walk_action_arg))
|
||||
|
||||
cnt= with_counters ?
|
||||
get_counter_from_merged_element(top->key, cnt_ofs) : 1;
|
||||
if (walk_action(top->key, cnt, walk_action_arg))
|
||||
goto end;
|
||||
top->key+= key_length;
|
||||
}
|
||||
|
@ -620,7 +652,7 @@ bool Unique::walk(TABLE *table, tree_walk_action action, void *walk_action_arg)
|
|||
(BUFFPEK *) file_ptrs.buffer,
|
||||
(BUFFPEK *) file_ptrs.buffer + file_ptrs.elements,
|
||||
action, walk_action_arg,
|
||||
tree.compare, tree.custom_arg, &file);
|
||||
tree.compare, tree.custom_arg, &file, with_counters);
|
||||
}
|
||||
my_free(merge_buffer);
|
||||
return res;
|
||||
|
|
Loading…
Reference in a new issue