Fixed bug mdev-4369.

The function was adjusted to be able to aggregate
the counters of the merged elements. 
Before this change it was not possible to guarantee the correctness
of the counters passed to the the call-back parameter walk_action.
As a result, when some elements of a Unique object were flushed into
disk the function passed to merge_walk() as the call-back parameter
could return wrong counters of elements. This could lead to building
wrong histograms.
This commit is contained in:
Igor Babaev 2013-04-06 15:36:28 -07:00
parent 1c30fb2a15
commit 10f0530b22
5 changed files with 121 additions and 6 deletions

View file

@ -1511,4 +1511,43 @@ test t1 a 1 5 0.0000 1.0000 10 DOUBLE_PREC_HB 0000FF3FFF7FFFBFFFFF
set histogram_size=default;
set histogram_type=default;
drop table t1;
#
# Bug mdev-4369: histogram for a column with many distinct values
#
CREATE TABLE t1 (id int);
CREATE TABLE t2 (id int);
INSERT INTO t1 (id) VALUES (1), (1), (1),(1);
INSERT INTO t1 (id) SELECT id FROM t1;
INSERT INTO t1 SELECT id+1 FROM t1;
INSERT INTO t1 SELECT id+2 FROM t1;
INSERT INTO t1 SELECT id+4 FROM t1;
INSERT INTO t1 SELECT id+8 FROM t1;
INSERT INTO t1 SELECT id+16 FROM t1;
INSERT INTO t1 SELECT id+32 FROM t1;
INSERT INTO t1 SELECT id+64 FROM t1;
INSERT INTO t1 SELECT id+128 FROM t1;
INSERT INTO t1 SELECT id+256 FROM t1;
INSERT INTO t1 SELECT id+512 FROM t1;
INSERT INTO t2 SELECT id FROM t1 ORDER BY id*rand();
SELECT COUNT(*) FROM t2;
COUNT(*)
8192
SELECT COUNT(DISTINCT id) FROM t2;
COUNT(DISTINCT id)
1024
set @@tmp_table_size=1024*16;
set @@max_heap_table_size=1024*16;
set histogram_size=63;
analyze table t2 persistent for all;
Table Op Msg_type Msg_text
test.t2 analyze status OK
select db_name, table_name, column_name,
min_value, max_value,
nulls_ratio, avg_frequency,
hist_size, hist_type, HEX(histogram)
FROM mysql.column_stats;
db_name table_name column_name min_value max_value nulls_ratio avg_frequency hist_size hist_type HEX(histogram)
test t2 id 1 1024 0.0000 8.0000 63 SINGLE_PREC_HB 03070B0F13171B1F23272B2F33373B3F43474B4F53575B5F63676B6F73777B7F83878B8F93979B9FA3A7ABAFB3B7BBBFC3C7CBCFD3D7DBDFE3E7EBEFF3F7FB
set histogram_size=default;
drop table t1, t2;
set use_stat_tables=@save_use_stat_tables;

View file

@ -638,5 +638,48 @@ set histogram_type=default;
drop table t1;
--echo #
--echo # Bug mdev-4369: histogram for a column with many distinct values
--echo #
CREATE TABLE t1 (id int);
CREATE TABLE t2 (id int);
INSERT INTO t1 (id) VALUES (1), (1), (1),(1);
INSERT INTO t1 (id) SELECT id FROM t1;
INSERT INTO t1 SELECT id+1 FROM t1;
INSERT INTO t1 SELECT id+2 FROM t1;
INSERT INTO t1 SELECT id+4 FROM t1;
INSERT INTO t1 SELECT id+8 FROM t1;
INSERT INTO t1 SELECT id+16 FROM t1;
INSERT INTO t1 SELECT id+32 FROM t1;
INSERT INTO t1 SELECT id+64 FROM t1;
INSERT INTO t1 SELECT id+128 FROM t1;
INSERT INTO t1 SELECT id+256 FROM t1;
INSERT INTO t1 SELECT id+512 FROM t1;
INSERT INTO t2 SELECT id FROM t1 ORDER BY id*rand();
SELECT COUNT(*) FROM t2;
SELECT COUNT(DISTINCT id) FROM t2;
set @@tmp_table_size=1024*16;
set @@max_heap_table_size=1024*16;
set histogram_size=63;
analyze table t2 persistent for all;
select db_name, table_name, column_name,
min_value, max_value,
nulls_ratio, avg_frequency,
hist_size, hist_type, HEX(histogram)
FROM mysql.column_stats;
set histogram_size=default;
drop table t1, t2;
set use_stat_tables=@save_use_stat_tables;

View file

@ -3997,6 +3997,7 @@ class Unique :public Sql_alloc
uint size;
uint full_size;
uint min_dupl_count; /* always 0 for unions, > 0 for intersections */
bool with_counters;
bool merge(TABLE *table, uchar *buff, bool without_last_merge);

View file

@ -1348,7 +1348,7 @@ public:
tree_key_length= field->pack_length();
tree= new Unique((qsort_cmp2) simple_str_key_cmp, (void*) field,
tree_key_length, max_heap_table_size);
tree_key_length, max_heap_table_size, 1);
}
virtual ~Count_distinct_field()
@ -1435,7 +1435,7 @@ public:
tree= new Unique((qsort_cmp2) simple_ulonglong_key_cmp,
(void*) &tree_key_length,
tree_key_length, max_heap_table_size);
tree_key_length, max_heap_table_size, 1);
}
bool add()

View file

@ -86,6 +86,7 @@ Unique::Unique(qsort_cmp2 comp_func, void * comp_func_fixed_arg,
full_size= size;
if (min_dupl_count_arg)
full_size+= sizeof(element_count);
with_counters= test(min_dupl_count_arg);
my_b_clear(&file);
init_tree(&tree, (ulong) (max_in_memory_size / 16), 0, size, comp_func,
NULL, comp_func_fixed_arg, MYF(MY_THREAD_SPECIFIC));
@ -428,6 +429,22 @@ static int buffpek_compare(void *arg, uchar *key_ptr1, uchar *key_ptr2)
C_MODE_END
inline
element_count get_counter_from_merged_element(void *ptr, uint ofs)
{
element_count cnt;
memcpy((uchar *) &cnt, (uchar *) ptr + ofs, sizeof(element_count));
return cnt;
}
inline
void put_counter_into_merged_element(void *ptr, uint ofs, element_count cnt)
{
memcpy((uchar *) ptr + ofs, (uchar *) &cnt, sizeof(element_count));
}
/*
DESCRIPTION
@ -457,6 +474,8 @@ C_MODE_END
file file with all trees dumped. Trees in the file
must contain sorted unique values. Cache must be
initialized in read mode.
with counters take into account counters for equal merged
elements
RETURN VALUE
0 ok
<> 0 error
@ -466,7 +485,7 @@ static bool merge_walk(uchar *merge_buffer, ulong merge_buffer_size,
uint key_length, BUFFPEK *begin, BUFFPEK *end,
tree_walk_action walk_action, void *walk_action_arg,
qsort_cmp2 compare, void *compare_arg,
IO_CACHE *file)
IO_CACHE *file, bool with_counters)
{
BUFFPEK_COMPARE_CONTEXT compare_context = { compare, compare_arg };
QUEUE queue;
@ -485,6 +504,8 @@ static bool merge_walk(uchar *merge_buffer, ulong merge_buffer_size,
uint bytes_read; /* to hold return value of read_to_buffer */
BUFFPEK *top;
int res= 1;
uint cnt_ofs= key_length - (with_counters ? sizeof(element_count) : 0);
element_count cnt;
/*
Invariant: queue must contain top element from each tree, until a tree
is not completely walked through.
@ -543,9 +564,17 @@ static bool merge_walk(uchar *merge_buffer, ulong merge_buffer_size,
/* new top has been obtained; if old top is unique, apply the action */
if (compare(compare_arg, old_key, top->key))
{
if (walk_action(old_key, 1, walk_action_arg))
cnt= with_counters ?
get_counter_from_merged_element(old_key, cnt_ofs) : 1;
if (walk_action(old_key, cnt, walk_action_arg))
goto end;
}
else if (with_counters)
{
cnt= get_counter_from_merged_element(top->key, cnt_ofs);
cnt+= get_counter_from_merged_element(old_key, cnt_ofs);
put_counter_into_merged_element(top->key, cnt_ofs, cnt);
}
}
/*
Applying walk_action to the tail of the last tree: this is safe because
@ -556,7 +585,10 @@ static bool merge_walk(uchar *merge_buffer, ulong merge_buffer_size,
{
do
{
if (walk_action(top->key, 1, walk_action_arg))
cnt= with_counters ?
get_counter_from_merged_element(top->key, cnt_ofs) : 1;
if (walk_action(top->key, cnt, walk_action_arg))
goto end;
top->key+= key_length;
}
@ -620,7 +652,7 @@ bool Unique::walk(TABLE *table, tree_walk_action action, void *walk_action_arg)
(BUFFPEK *) file_ptrs.buffer,
(BUFFPEK *) file_ptrs.buffer + file_ptrs.elements,
action, walk_action_arg,
tree.compare, tree.custom_arg, &file);
tree.compare, tree.custom_arg, &file, with_counters);
}
my_free(merge_buffer);
return res;