Fix Bug#30423 "InnoDBs treatment of NULL in index stats causes bad

"rows examined" estimates". This change implements "innodb_stats_method"
with options of "nulls_equal", "nulls_unequal" and "null_ignored".
      
rb://553 approved by Marko
This commit is contained in:
Jimmy Yang 2011-01-14 09:02:28 -08:00
commit 9cd4d49840
25 changed files with 1157 additions and 77 deletions

View file

@ -66,6 +66,13 @@ this many index pages */
/*--------------------------------------*/
#define BTR_BLOB_HDR_SIZE 8
/* Estimated table level stats from sampled value. */
#define BTR_TABLE_STATS_FROM_SAMPLE(value, index, ext_size, not_empty) \
((value * (ib_longlong) index->stat_n_leaf_pages \
+ BTR_KEY_VAL_ESTIMATE_N_PAGES - 1 + ext_size \
+ not_empty) \
/ (BTR_KEY_VAL_ESTIMATE_N_PAGES + ext_size))
/***********************************************************************
Marks all extern fields in a record as owned by the record. This function
should be called if the delete mark of a record is removed: a not delete
@ -2834,10 +2841,55 @@ btr_estimate_n_rows_in_range(
}
}
/***********************************************************************
Record the number of non_null key values in a given index for
each n-column prefix of the index where n < dict_index_get_n_unique(index).
The estimates are eventually stored in the array:
index->stat_n_non_null_key_vals. */
static
void
btr_record_not_null_field_in_rec(
/*=============================*/
rec_t* rec, /* in: physical record */
ulint n_unique, /* in: dict_index_get_n_unique(index),
number of columns uniquely determine
an index entry */
const ulint* offsets, /* in: rec_get_offsets(rec, index),
its size could be for all fields or
that of "n_unique" */
ib_longlong* n_not_null) /* in/out: array to record number of
not null rows for n-column prefix */
{
ulint i;
ut_ad(rec_offs_n_fields(offsets) >= n_unique);
if (n_not_null == NULL) {
return;
}
for (i = 0; i < n_unique; i++) {
ulint rec_len;
byte* field;
field = rec_get_nth_field(rec, offsets, i, &rec_len);
if (rec_len != UNIV_SQL_NULL) {
n_not_null[i]++;
} else {
/* Break if we hit the first NULL value */
break;
}
}
}
/***********************************************************************
Estimates the number of different key values in a given index, for
each n-column prefix of the index where n <= dict_index_get_n_unique(index).
The estimates are stored in the array index->stat_n_diff_key_vals. */
The estimates are stored in the array index->stat_n_diff_key_vals.
If innodb_stats_method is "nulls_ignored", we also record the number of
non-null values for each prefix and store the estimates in
array index->stat_n_non_null_key_vals. */
void
btr_estimate_number_of_different_key_vals(
@ -2851,6 +2903,8 @@ btr_estimate_number_of_different_key_vals(
ulint matched_fields;
ulint matched_bytes;
ib_longlong* n_diff;
ib_longlong* n_not_null;
ibool stats_null_not_equal;
ulint not_empty_flag = 0;
ulint total_external_size = 0;
ulint i;
@ -2858,24 +2912,47 @@ btr_estimate_number_of_different_key_vals(
ulint add_on;
mtr_t mtr;
mem_heap_t* heap = NULL;
ulint offsets_rec_[REC_OFFS_NORMAL_SIZE];
ulint offsets_next_rec_[REC_OFFS_NORMAL_SIZE];
ulint* offsets_rec = offsets_rec_;
ulint* offsets_next_rec= offsets_next_rec_;
*offsets_rec_ = (sizeof offsets_rec_) / sizeof *offsets_rec_;
*offsets_next_rec_
= (sizeof offsets_next_rec_) / sizeof *offsets_next_rec_;
ulint* offsets_rec = NULL;
ulint* offsets_next_rec = NULL;
n_cols = dict_index_get_n_unique(index);
n_diff = mem_alloc((n_cols + 1) * sizeof(ib_longlong));
heap = mem_heap_create((sizeof *n_diff + sizeof *n_not_null)
* (n_cols + 1)
+ dict_index_get_n_fields(index)
* (sizeof *offsets_rec
+ sizeof *offsets_next_rec));
memset(n_diff, 0, (n_cols + 1) * sizeof(ib_longlong));
n_diff = mem_heap_zalloc(heap, (n_cols + 1) * sizeof(ib_longlong));
n_not_null = NULL;
/* Check srv_innodb_stats_method setting, and decide whether we
need to record non-null value and also decide if NULL is
considered equal (by setting stats_null_not_equal value) */
switch (srv_innodb_stats_method) {
case SRV_STATS_NULLS_IGNORED:
n_not_null = mem_heap_zalloc(heap, (n_cols + 1)
* sizeof *n_not_null);
/* fall through */
case SRV_STATS_NULLS_UNEQUAL:
/* for both SRV_STATS_NULLS_IGNORED and SRV_STATS_NULLS_UNEQUAL
case, we will treat NULLs as unequal value */
stats_null_not_equal = TRUE;
break;
case SRV_STATS_NULLS_EQUAL:
stats_null_not_equal = FALSE;
break;
default:
ut_error;
}
/* We sample some pages in the index to get an estimate */
for (i = 0; i < BTR_KEY_VAL_ESTIMATE_N_PAGES; i++) {
rec_t* supremum;
mtr_start(&mtr);
btr_cur_open_at_rnd_pos(index, BTR_SEARCH_LEAF, &cursor, &mtr);
@ -2888,18 +2965,22 @@ btr_estimate_number_of_different_key_vals(
page = btr_cur_get_page(&cursor);
supremum = page_get_supremum_rec(page);
rec = page_rec_get_next(page_get_infimum_rec(page));
if (rec != supremum) {
if (!page_rec_is_supremum(rec)) {
not_empty_flag = 1;
offsets_rec = rec_get_offsets(rec, index, offsets_rec,
ULINT_UNDEFINED, &heap);
if (n_not_null) {
btr_record_not_null_field_in_rec(
rec, n_cols, offsets_rec, n_not_null);
}
}
while (rec != supremum) {
while (!page_rec_is_supremum(rec)) {
rec_t* next_rec = page_rec_get_next(rec);
if (next_rec == supremum) {
if (page_rec_is_supremum(next_rec)) {
break;
}
@ -2911,7 +2992,8 @@ btr_estimate_number_of_different_key_vals(
cmp_rec_rec_with_match(rec, next_rec,
offsets_rec, offsets_next_rec,
index, &matched_fields,
index, stats_null_not_equal,
&matched_fields,
&matched_bytes);
for (j = matched_fields + 1; j <= n_cols; j++) {
@ -2921,6 +3003,12 @@ btr_estimate_number_of_different_key_vals(
n_diff[j]++;
}
if (n_not_null) {
btr_record_not_null_field_in_rec(
next_rec, n_cols, offsets_next_rec,
n_not_null);
}
total_external_size
+= btr_rec_get_externally_stored_len(
rec, offsets_rec);
@ -2971,14 +3059,8 @@ btr_estimate_number_of_different_key_vals(
included in index->stat_n_leaf_pages) */
for (j = 0; j <= n_cols; j++) {
index->stat_n_diff_key_vals[j]
= ((n_diff[j]
* (ib_longlong)index->stat_n_leaf_pages
+ BTR_KEY_VAL_ESTIMATE_N_PAGES - 1
+ total_external_size
+ not_empty_flag)
/ (BTR_KEY_VAL_ESTIMATE_N_PAGES
+ total_external_size));
index->stat_n_diff_key_vals[j] = BTR_TABLE_STATS_FROM_SAMPLE(
n_diff[j], index, total_external_size, not_empty_flag);
/* If the tree is small, smaller than
10 * BTR_KEY_VAL_ESTIMATE_N_PAGES + total_external_size, then
@ -2997,12 +3079,20 @@ btr_estimate_number_of_different_key_vals(
}
index->stat_n_diff_key_vals[j] += add_on;
/* Update the stat_n_non_null_key_vals[] with our
sampled result. stat_n_non_null_key_vals[] is created
and initialized to zero in dict_index_add_to_cache(),
along with stat_n_diff_key_vals[] array */
if (n_not_null != NULL && (j < n_cols)) {
index->stat_n_non_null_key_vals[j] =
BTR_TABLE_STATS_FROM_SAMPLE(
n_not_null[j], index,
total_external_size, not_empty_flag);
}
}
mem_free(n_diff);
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}
mem_heap_free(heap);
}
/*================== EXTERNAL STORAGE OF BIG FIELDS ===================*/

View file

@ -1358,6 +1358,12 @@ dict_index_add_to_cache(
new_index->heap,
(1 + dict_index_get_n_unique(new_index))
* sizeof(ib_longlong));
new_index->stat_n_non_null_key_vals = mem_heap_zalloc(
new_index->heap,
(1 + dict_index_get_n_unique(new_index))
* sizeof(*new_index->stat_n_non_null_key_vals));
/* Give some sensible values to stat_n_... in case we do
not calculate statistics quickly enough */
@ -3817,6 +3823,10 @@ dict_update_statistics_low(
for (i = dict_index_get_n_unique(index); i; ) {
index->stat_n_diff_key_vals[i--] = 1;
}
memset(index->stat_n_non_null_key_vals, 0,
(1 + dict_index_get_n_unique(index))
* sizeof(*index->stat_n_non_null_key_vals));
}
index = dict_table_get_next_index(index);

View file

@ -130,6 +130,25 @@ static my_bool innobase_adaptive_hash_index = TRUE;
static char* internal_innobase_data_file_path = NULL;
/* Possible values for system variable "innodb_stats_method". The values
are defined the same as its corresponding MyISAM system variable
"myisam_stats_method"(see "myisam_stats_method_names"), for better usability */
static const char* innodb_stats_method_names[] = {
"nulls_equal",
"nulls_unequal",
"nulls_ignored",
NullS
};
/* Used to define an enumerate type of the system variable innodb_stats_method.
This is the same as "myisam_stats_method_typelib" */
static TYPELIB innodb_stats_method_typelib = {
array_elements(innodb_stats_method_names) - 1,
"innodb_stats_method_typelib",
innodb_stats_method_names,
NULL
};
/* The following counter is used to convey information to InnoDB
about server activity: in selects it is not sensible to call
srv_active_wake_master_thread after each fetch or search, we only do
@ -6362,6 +6381,65 @@ ha_innobase::read_time(
return(ranges + (double) rows / (double) total_rows * time_for_scan);
}
/*************************************************************************
Calculate Record Per Key value. Need to exclude the NULL value if
innodb_stats_method is set to "nulls_ignored" */
static
ha_rows
innodb_rec_per_key(
/*===============*/
/* out: estimated record per key
value */
dict_index_t* index, /* in: dict_index_t structure */
ulint i, /* in: the column we are
calculating rec per key */
ha_rows records) /* in: estimated total records */
{
ha_rows rec_per_key;
ut_ad(i < dict_index_get_n_unique(index));
/* Note the stat_n_diff_key_vals[] stores the diff value with
n-prefix indexing, so it is always stat_n_diff_key_vals[i + 1] */
if (index->stat_n_diff_key_vals[i + 1] == 0) {
rec_per_key = records;
} else if (srv_innodb_stats_method == SRV_STATS_NULLS_IGNORED) {
ib_longlong num_null;
/* Number of rows with NULL value in this
field */
num_null = records - index->stat_n_non_null_key_vals[i];
/* In theory, index->stat_n_non_null_key_vals[i]
should always be less than the number of records.
Since this is statistics value, the value could
have slight discrepancy. But we will make sure
the number of null values is not a negative number. */
num_null = (num_null < 0) ? 0 : num_null;
/* If the number of NULL values is the same as or
large than that of the distinct values, we could
consider that the table consists mostly of NULL value.
Set rec_per_key to 1. */
if (index->stat_n_diff_key_vals[i + 1] <= num_null) {
rec_per_key = 1;
} else {
/* Need to exclude rows with NULL values from
rec_per_key calculation */
rec_per_key = (ha_rows)(
(records - num_null)
/ (index->stat_n_diff_key_vals[i + 1]
- num_null));
}
} else {
rec_per_key = (ha_rows)
(records / index->stat_n_diff_key_vals[i + 1]);
}
return(rec_per_key);
}
/*************************************************************************
Returns statistics information of the table to the MySQL interpreter,
in various fields of the handle object. */
@ -6568,13 +6646,8 @@ ha_innobase::info_low(
break;
}
if (index->stat_n_diff_key_vals[j + 1] == 0) {
rec_per_key = stats.records;
} else {
rec_per_key = (ha_rows)(stats.records /
index->stat_n_diff_key_vals[j + 1]);
}
rec_per_key = innodb_rec_per_key(
index, j, stats.records);
/* Since MySQL seems to favor table scans
too much over index searches, we pretend
@ -8990,6 +9063,13 @@ static MYSQL_SYSVAR_LONG(autoinc_lock_mode, innobase_autoinc_lock_mode,
AUTOINC_OLD_STYLE_LOCKING, /* Minimum value */
AUTOINC_NO_LOCKING, 0); /* Maximum value */
static MYSQL_SYSVAR_ENUM(stats_method, srv_innodb_stats_method,
PLUGIN_VAR_RQCMDARG,
"Specifies how InnoDB index statistics collection code should "
"treat NULLs. Possible values are NULLS_EQUAL (default), "
"NULLS_UNEQUAL and NULLS_IGNORED",
NULL, NULL, SRV_STATS_NULLS_EQUAL, &innodb_stats_method_typelib);
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
static MYSQL_SYSVAR_UINT(change_buffering_debug, ibuf_debug,
PLUGIN_VAR_RQCMDARG,
@ -9031,6 +9111,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(stats_on_metadata),
MYSQL_SYSVAR(use_legacy_cardinality_algorithm),
MYSQL_SYSVAR(adaptive_hash_index),
MYSQL_SYSVAR(stats_method),
MYSQL_SYSVAR(status_file),
MYSQL_SYSVAR(support_xa),
MYSQL_SYSVAR(sync_spin_loops),

View file

@ -404,7 +404,10 @@ btr_estimate_n_rows_in_range(
/***********************************************************************
Estimates the number of different key values in a given index, for
each n-column prefix of the index where n <= dict_index_get_n_unique(index).
The estimates are stored in the array index->stat_n_diff_key_vals. */
The estimates are stored in the array index->stat_n_diff_key_vals.
If innodb_stats_method is nulls_ignored, we also record the number of
non-null values for each prefix and stored the estimates in
array index->stat_n_non_null_key_vals. */
void
btr_estimate_number_of_different_key_vals(

View file

@ -222,6 +222,12 @@ struct dict_index_struct{
for this index, for each n-column prefix
where n <= dict_get_n_unique(index); we
periodically calculate new estimates */
ib_longlong* stat_n_non_null_key_vals;
/* approximate number of non-null key values
for this index, for each column where
n < dict_get_n_unique(index); This
is used when innodb_stats_method is
"nulls_ignored". */
ulint stat_index_size;
/* approximate index size in database pages */
ulint stat_n_leaf_pages;

View file

@ -141,6 +141,10 @@ cmp_rec_rec_with_match(
const ulint* offsets1,/* in: rec_get_offsets(rec1, index) */
const ulint* offsets2,/* in: rec_get_offsets(rec2, index) */
dict_index_t* index, /* in: data dictionary index */
ibool nulls_unequal,
/* in: TRUE if this is for index statistics
cardinality estimation, and innodb_stats_method
is "nulls_unequal" or "nulls_ignored" */
ulint* matched_fields, /* in/out: number of already completely
matched fields; when the function returns,
contains the value the for current

View file

@ -72,5 +72,5 @@ cmp_rec_rec(
ulint match_b = 0;
return(cmp_rec_rec_with_match(rec1, rec2, offsets1, offsets2, index,
&match_f, &match_b));
FALSE, &match_f, &match_b));
}

View file

@ -91,6 +91,11 @@ extern ulint srv_lock_table_size;
extern ulint srv_n_file_io_threads;
/* The "innodb_stats_method" setting, decides how InnoDB is going
to treat NULL value when collecting statistics. It is not defined
as enum type because the configure option takes unsigned integer type. */
extern ulong srv_innodb_stats_method;
#ifdef UNIV_LOG_ARCHIVE
extern ibool srv_log_archive_on;
extern ibool srv_archive_recovery;
@ -286,6 +291,19 @@ of lower numbers are included. */
#define SRV_FORCE_NO_LOG_REDO 6 /* do not do the log roll-forward
in connection with recovery */
/* Alternatives for srv_innodb_stats_method, which could be changed by
setting innodb_stats_method */
enum srv_stats_method_name_enum {
SRV_STATS_NULLS_EQUAL, /* All NULL values are treated as
equal. This is the default setting
for innodb_stats_method */
SRV_STATS_NULLS_UNEQUAL, /* All NULL values are treated as
NOT equal. */
SRV_STATS_NULLS_IGNORED /* NULL values are ignored */
};
typedef enum srv_stats_method_name_enum srv_stats_method_name_t;
/*************************************************************************
Boots Innobase server. */

View file

@ -720,6 +720,10 @@ cmp_rec_rec_with_match(
const ulint* offsets1,/* in: rec_get_offsets(rec1, index) */
const ulint* offsets2,/* in: rec_get_offsets(rec2, index) */
dict_index_t* index, /* in: data dictionary index */
ibool nulls_unequal,
/* in: TRUE if this is for index statistics
cardinality estimation, and innodb_stats_method
is "nulls_unequal" or "nulls_ignored" */
ulint* matched_fields, /* in/out: number of already completely
matched fields; when the function returns,
contains the value the for current
@ -821,9 +825,13 @@ cmp_rec_rec_with_match(
|| rec2_f_len == UNIV_SQL_NULL) {
if (rec1_f_len == rec2_f_len) {
goto next_field;
/* This is limited to stats collection,
cannot use it for regular search */
if (nulls_unequal) {
ret = -1;
} else {
goto next_field;
}
} else if (rec2_f_len == UNIV_SQL_NULL) {
/* We define the SQL null to be the

View file

@ -218,6 +218,11 @@ ulong srv_max_buf_pool_modified_pct = 90;
/* variable counts amount of data read in total (in bytes) */
ulint srv_data_read = 0;
/* Internal setting for "innodb_stats_method". Decides how InnoDB treats
NULL value when collecting statistics. By default, it is set to
SRV_STATS_NULLS_EQUAL(0), ie. all NULL value are treated equal */
ulong srv_innodb_stats_method = SRV_STATS_NULLS_EQUAL;
/* here we count the amount of data written in total (in bytes) */
ulint srv_data_written = 0;