BUG#9622, stage 2, work together with fix for BUG#12232:

added "nulls_ignored" index statistics collection method for MyISAM tables.
(notification trigger: this is about BUG#9622).


include/my_base.h:
  BUG#9622: Added MI_STATS_METHOD_IGNORE_NULLS statistics collection method:
  Added SEARCH_RETURN_B_POS flag for ha_key_cmp()
include/my_handler.h:
  BUG#9622: Added MI_STATS_METHOD_IGNORE_NULLS statistics collection method: added ha_find_null()
include/myisam.h:
  BUG#9622: Added MI_STATS_METHOD_IGNORE_NULLS statistics collection method.
myisam/mi_check.c:
  BUG#9622: Added MI_STATS_METHOD_IGNORE_NULLS statistics collection method, added 
   mi_collect_stats_*(), updated update_key_parts() to deal with all 3 methods.
myisam/myisamchk.c:
  BUG#9622: Added nulls_ignored index statistics collection method for MyISAM
myisam/myisamdef.h:
  BUG#9622: Added MI_STATS_METHOD_IGNORE_NULLS statistics collection method.
myisam/sort.c:
  BUG#9622: Added MI_STATS_METHOD_IGNORE_NULLS statistics collection method.
mysql-test/r/myisam.result:
  Testcase for BUG9622
mysql-test/t/myisam.test:
  Testcase for BUG9622
mysys/my_handler.c:
  BUG#9622: ha_key_cmp() now supports new SEARCH_RETURN_B_POS flag, added ha_find_null()
sql/ha_myisam.cc:
  BUG#9622: Added MI_STATS_METHOD_IGNORE_NULLS statistics collection method.
sql/mysqld.cc:
  BUG#9622: Added MI_STATS_METHOD_IGNORE_NULLS statistics collection method.
This commit is contained in:
unknown 2005-10-21 06:29:17 +04:00
commit 71fdef4d10
12 changed files with 375 additions and 31 deletions

View file

@ -391,7 +391,10 @@ int chk_key(MI_CHECK *param, register MI_INFO *info)
found_keys++;
param->record_checksum=init_checksum;
bzero((char*) &param->unique_count,sizeof(param->unique_count));
bzero((char*) &param->notnull_count,sizeof(param->notnull_count));
if ((!(param->testflag & T_SILENT)))
printf ("- check data record references index: %d\n",key+1);
if (keyinfo->flag & HA_FULLTEXT)
@ -496,7 +499,9 @@ int chk_key(MI_CHECK *param, register MI_INFO *info)
if (param->testflag & T_STATISTICS)
update_key_parts(keyinfo, rec_per_key_part, param->unique_count,
(ulonglong) info->state->records);
param->stats_method == MI_STATS_METHOD_IGNORE_NULLS?
param->notnull_count: NULL,
(ulonglong)info->state->records);
}
if (param->testflag & T_INFO)
{
@ -552,6 +557,87 @@ err:
return 1;
}
/*
"Ignore NULLs" statistics collection method: process first index tuple.
SYNOPSIS
mi_collect_stats_nonulls_first()
keyseg IN Array of key part descriptions
notnull INOUT Array, notnull[i] = (number of {keypart1...keypart_i}
tuples that don't contain NULLs)
key IN Key values tuple
DESCRIPTION
Process the first index tuple - find out which prefix tuples don't
contain NULLs, and update the array of notnull counters accordingly.
*/
static
void mi_collect_stats_nonulls_first(HA_KEYSEG *keyseg, ulonglong *notnull,
uchar *key)
{
uint first_null, kp;
first_null= ha_find_null(keyseg, key) - keyseg;
/*
All prefix tuples that don't include keypart_{first_null} are not-null
tuples (and all others aren't), increment counters for them.
*/
for (kp= 0; kp < first_null; kp++)
notnull[kp]++;
}
/*
"Ignore NULLs" statistics collection method: process next index tuple.
SYNOPSIS
mi_collect_stats_nonulls_next()
keyseg IN Array of key part descriptions
notnull INOUT Array, notnull[i] = (number of {keypart1...keypart_i}
tuples that don't contain NULLs)
prev_key IN Previous key values tuple
last_key IN Next key values tuple
DESCRIPTION
Process the next index tuple:
1. Find out which prefix tuples of last_key don't contain NULLs, and
update the array of notnull counters accordingly.
2. Find the first keypart number where the tuples are different(A), or
last_key has NULL value (B), and return it, so caller can count
number of unique tuples for each key prefix. We don't need (B) to be
counted, and that is compensated back in update_key_parts().
RETURN
1 + number of first keypart where values differ or last_key tuple has NULL
*/
static
int mi_collect_stats_nonulls_next(HA_KEYSEG *keyseg, ulonglong *notnull,
uchar *prev_key, uchar *last_key)
{
uint diffs[2];
uint first_null_seg, kp;
/* Find first keypart where values are different or either of them is NULL */
ha_key_cmp(keyseg, prev_key, last_key, USE_WHOLE_KEY,
SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL | SEARCH_RETURN_B_POS,
diffs);
HA_KEYSEG *seg= keyseg + diffs[0] - 1;
/* Find first NULL in last_key */
first_null_seg= ha_find_null(seg, last_key + diffs[1]) - keyseg;
for (kp= 0; kp < first_null_seg; kp++)
notnull[kp]++;
/*
Return 1+ number of first key part where values differ. Don't care if
these were NULLs and not .... We compensate for that in
update_key_parts.
*/
return diffs[0];
}
/* Check if index is ok */
static int chk_index(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo,
@ -641,8 +727,20 @@ static int chk_index(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo,
ha_key_cmp(keyinfo->seg,info->lastkey,key,USE_WHOLE_KEY,
SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL,
&diff_pos);
else if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
{
diff_pos= mi_collect_stats_nonulls_next(keyinfo->seg,
param->notnull_count,
info->lastkey, key);
}
param->unique_count[diff_pos-1]++;
}
else
{
if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
mi_collect_stats_nonulls_first(keyinfo->seg, param->notnull_count,
key);
}
}
(*key_checksum)+= mi_byte_checksum((byte*) key,
key_length- info->s->rec_reflength);
@ -2088,7 +2186,8 @@ int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info,
if (param->testflag & T_STATISTICS)
update_key_parts(sort_param.keyinfo, rec_per_key_part, sort_param.unique,
(ulonglong) info->state->records);
param->stats_method == MI_STATS_METHOD_IGNORE_NULLS?
sort_param.notnull: NULL,(ulonglong) info->state->records);
share->state.key_map|=(ulonglong) 1 << sort_param.key;
if (sort_param.fix_datafile)
@ -3255,11 +3354,21 @@ static int sort_key_write(MI_SORT_PARAM *sort_param, const void *a)
ha_key_cmp(sort_param->seg,sort_info->key_block->lastkey,
(uchar*) a, USE_WHOLE_KEY,
SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, &diff_pos);
else if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
{
diff_pos= mi_collect_stats_nonulls_next(sort_param->seg,
sort_param->notnull,
sort_info->key_block->lastkey,
(uchar*)a);
}
sort_param->unique[diff_pos-1]++;
}
else
{
cmp= -1;
if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
mi_collect_stats_nonulls_first(sort_param->seg, sort_param->notnull,
(uchar*)a);
}
if ((sort_param->keyinfo->flag & HA_NOSAME) && cmp == 0)
{
@ -3981,21 +4090,30 @@ void update_auto_increment_key(MI_CHECK *param, MI_INFO *info,
SYNOPSIS
update_key_parts()
keyinfo Index information (only key->keysegs used)
keyinfo IN Index information (only key->keysegs used)
rec_per_key_part OUT Store statistics here
unique IN Array of #distinct values collected over index
run.
unique IN Array of (#distinct tuples)
notnull_tuples IN Array of (#tuples), or NULL
records Number of records in the table
NOTES
This function handles all 3 index statistics collection methods.
Unique is an array:
unique[0]= (#different values of {keypart1}) - 1
unique[1]= (#different values of {keypart2,keypart1} tuple) - unique[0] - 1
unique[1]= (#different values of {keypart1,keypart2} tuple) - unique[0] - 1
...
For MI_STATS_METHOD_IGNORE_NULLS notnull_tuples is an array too:
notnull_tuples[0]= (# of {keypart1} tuples such that keypart1 is not NULL)
notnull_tuples[1]= (# of {keypart1,keypart2} tuples such that all
keypart{i} are not NULL)
...
For all other statistics collection methods notnull_tuples=NULL.
The 'unique' array is collected in one sequential scan through the entire
index. This is done in two places: in chk_index() and in sort_key_write().
Statistics collection may consider NULLs as either equal or unequal (see
SEARCH_NULL_ARE_NOT_EQUAL, MI_STATS_METHOD_*).
notnull_tuples, if present, is collected during the same index scan.
Output is an array:
rec_per_key_part[k] =
@ -4007,25 +4125,53 @@ void update_auto_increment_key(MI_CHECK *param, MI_INFO *info,
index tuples}
= #tuples-in-the-index / #distinct-tuples-in-the-index.
The #tuples-in-the-index and #distinct-tuples-in-the-index have different
meaning depending on which statistics collection method is used:
MI_STATS_METHOD_* how are nulls compared? which tuples are counted?
NULLS_EQUAL NULL == NULL all tuples in table
NULLS_NOT_EQUAL NULL != NULL all tuples in table
IGNORE_NULLS n/a tuples that don't have NULLs
*/
void update_key_parts(MI_KEYDEF *keyinfo, ulong *rec_per_key_part,
ulonglong *unique, ulonglong records)
ulonglong *unique, ulonglong *notnull,
ulonglong records)
{
ulonglong count=0,tmp;
ulonglong count=0,tmp, unique_tuples;
ulonglong tuples= records;
uint parts;
for (parts=0 ; parts < keyinfo->keysegs ; parts++)
{
count+=unique[parts];
if (count == 0)
tmp=records;
unique_tuples= count + 1;
if (notnull)
{
tuples= notnull[parts];
/*
#(unique_tuples not counting tuples with NULLs) =
#(unique_tuples counting tuples with NULLs as different) -
#(tuples with NULLs)
*/
unique_tuples -= (records - notnull[parts]);
}
if (unique_tuples == 0)
tmp= 1;
else if (count == 0)
tmp= tuples; /* 1 unique tuple */
else
tmp= (records + (count+1)/2) / (count+1);
/* for some weird keys (e.g. FULLTEXT) tmp can be <1 here.
let's ensure it is not */
tmp= (tuples + unique_tuples/2) / unique_tuples;
/*
for some weird keys (e.g. FULLTEXT) tmp can be <1 here.
let's ensure it is not
*/
set_if_bigger(tmp,1);
if (tmp >= (ulonglong) ~(ulong) 0)
tmp=(ulonglong) ~(ulong) 0;
*rec_per_key_part=(ulong) tmp;
rec_per_key_part++;
}

View file

@ -339,7 +339,8 @@ static struct my_option my_long_options[] =
REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
{"stats_method", OPT_STATS_METHOD,
"Specifies how index statistics collection code should threat NULLs. "
"Possible values of name are \"nulls_unequal\" (default behavior for 4.1/5.0), and \"nulls_equal\" (emulate 4.0 behavior).",
"Possible values of name are \"nulls_unequal\" (default behavior for 4.1/5.0), "
"\"nulls_equal\" (emulate 4.0 behavior), and \"nulls_ignored\".",
(gptr*) &myisam_stats_method_str, (gptr*) &myisam_stats_method_str, 0,
GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
{ 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
@ -451,6 +452,10 @@ static void usage(void)
-a, --analyze Analyze distribution of keys. Will make some joins in\n\
MySQL faster. You can check the calculated distribution\n\
by using '--description --verbose table_name'.\n\
--stats_method=name Specifies how index statistics collection code should\n\
threat NULLs. Possible values of name are \"nulls_unequal\"\n\
(default for 4.1/5.0), \"nulls_equal\" (emulate 4.0), and \n\
\"nulls_ignored\".\n\
-d, --description Prints some information about table.\n\
-A, --set-auto-increment[=value]\n\
Force auto_increment to start at this or higher value\n\
@ -472,7 +477,7 @@ static void usage(void)
#include <help_end.h>
const char *myisam_stats_method_names[] = {"nulls_unequal", "nulls_equal",
NullS};
"nulls_ignored", NullS};
TYPELIB myisam_stats_method_typelib= {
array_elements(myisam_stats_method_names) - 1, "",
myisam_stats_method_names, NULL};
@ -699,14 +704,25 @@ get_one_option(int optid,
case OPT_STATS_METHOD:
{
int method;
enum_mi_stats_method method_conv;
myisam_stats_method_str= argument;
if ((method=find_type(argument, &myisam_stats_method_typelib, 2)) <= 0)
{
fprintf(stderr, "Invalid value of stats_method: %s.\n", argument);
exit(1);
}
check_param.stats_method= test(method-1)? MI_STATS_METHOD_NULLS_EQUAL :
MI_STATS_METHOD_NULLS_NOT_EQUAL;
switch (method-1) {
case 0:
method_conv= MI_STATS_METHOD_NULLS_EQUAL;
break;
case 1:
method_conv= MI_STATS_METHOD_NULLS_NOT_EQUAL;
break;
case 2:
method_conv= MI_STATS_METHOD_IGNORE_NULLS;
break;
}
check_param.stats_method= method_conv;
break;
}
#ifdef DEBUG /* Only useful if debugging */

View file

@ -297,7 +297,14 @@ typedef struct st_mi_sort_param
pthread_t thr;
IO_CACHE read_cache, tempfile, tempfile_for_exceptions;
DYNAMIC_ARRAY buffpek;
/*
The next two are used to collect statistics, see update_key_parts for
description.
*/
ulonglong unique[MI_MAX_KEY_SEG+1];
ulonglong notnull[MI_MAX_KEY_SEG+1];
my_off_t pos,max_pos,filepos,start_recpos;
uint key, key_length,real_key_length,sortbuff_size;
uint maxbuffers, keys, find_length, sort_keys_length;

View file

@ -481,8 +481,12 @@ int thr_write_keys(MI_SORT_PARAM *sort_param)
{
share->state.key_map|=(ulonglong) 1 << sinfo->key;
if (param->testflag & T_STATISTICS)
update_key_parts(sinfo->keyinfo, rec_per_key_part,
sinfo->unique, (ulonglong) info->state->records);
update_key_parts(sinfo->keyinfo, rec_per_key_part, sinfo->unique,
param->stats_method == MI_STATS_METHOD_IGNORE_NULLS?
sinfo->notnull: NULL,
(ulonglong) info->state->records);
if (!sinfo->buffpek.elements)
{
if (param->testflag & T_VERBOSE)