LDML refactoring for "MDEV-9711 NO PAD collations"

- Moving detection of the MY_CS_CSSORT, MY_CS_PUREASCII, MY_CS_NONASCII
  flags of loadable collations from add_collation() in mysys.c
  to my_cset_init_8bit() and my_coll_init_simple() in ctype-simple.c.

- Adding tests that these flags are set properly for loadable collations

- Moving LDML test related *.xml files from mysql-test/std_data/
  to mysql-test/std_data/ldml/, as there will be more *.xml test files
This commit is contained in:
Alexander Barkov 2016-09-03 09:05:56 +04:00
commit 1ca595fbf7
13 changed files with 299 additions and 74 deletions

View file

@ -193,25 +193,19 @@ static int my_read_charset_file(const char *filename)
return FALSE;
}
static int
is_case_sensitive(CHARSET_INFO *cs)
{
return (cs->sort_order &&
cs->sort_order['A'] < cs->sort_order['a'] &&
cs->sort_order['a'] < cs->sort_order['B']) ? 1 : 0;
}
void dispcset(FILE *f,CHARSET_INFO *cs)
{
uint flags= my_8bit_charset_flags_from_data(cs) |
my_8bit_collation_flags_from_data(cs);
fprintf(f,"{\n");
fprintf(f," %d,%d,%d,\n",cs->number,0,0);
fprintf(f," MY_CS_COMPILED%s%s%s%s%s,\n",
cs->state & MY_CS_BINSORT ? "|MY_CS_BINSORT" : "",
cs->state & MY_CS_PRIMARY ? "|MY_CS_PRIMARY" : "",
is_case_sensitive(cs) ? "|MY_CS_CSSORT" : "",
my_charset_is_8bit_pure_ascii(cs) ? "|MY_CS_PUREASCII" : "",
!my_charset_is_ascii_compatible(cs) ? "|MY_CS_NONASCII": "");
flags & MY_CS_CSSORT ? "|MY_CS_CSSORT" : "",
flags & MY_CS_PUREASCII ? "|MY_CS_PUREASCII" : "",
flags & MY_CS_NONASCII ? "|MY_CS_NONASCII" : "");
if (cs->name)
{

View file

@ -1340,9 +1340,80 @@ create_fromuni(struct charset_info_st *cs,
return FALSE;
}
/*
Detect if a character set is 8bit,
and it is pure ascii, i.e. doesn't have
characters outside U+0000..U+007F
This functions is shared between "conf_to_src"
and dynamic charsets loader in "mysqld".
*/
static my_bool
my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs)
{
size_t code;
if (!cs->tab_to_uni)
return 0;
for (code= 0; code < 256; code++)
{
if (cs->tab_to_uni[code] > 0x7F)
return 0;
}
return 1;
}
/*
Shared function between conf_to_src and mysys.
Check if a 8bit character set is compatible with
ascii on the range 0x00..0x7F.
*/
static my_bool
my_charset_is_ascii_compatible(CHARSET_INFO *cs)
{
uint i;
if (!cs->tab_to_uni)
return 1;
for (i= 0; i < 128; i++)
{
if (cs->tab_to_uni[i] != i)
return 0;
}
return 1;
}
uint my_8bit_charset_flags_from_data(CHARSET_INFO *cs)
{
uint flags= 0;
if (my_charset_is_8bit_pure_ascii(cs))
flags|= MY_CS_PUREASCII;
if (!my_charset_is_ascii_compatible(cs))
flags|= MY_CS_NONASCII;
return flags;
}
/*
Check if case sensitive sort order: A < a < B.
We need MY_CS_FLAG for regex library, and for
case sensitivity flag for 5.0 client protocol,
to support isCaseSensitive() method in JDBC driver
*/
uint my_8bit_collation_flags_from_data(CHARSET_INFO *cs)
{
uint flags= 0;
if (cs->sort_order && cs->sort_order['A'] < cs->sort_order['a'] &&
cs->sort_order['a'] < cs->sort_order['B'])
flags|= MY_CS_CSSORT;
return flags;
}
static my_bool
my_cset_init_8bit(struct charset_info_st *cs, MY_CHARSET_LOADER *loader)
{
cs->state|= my_8bit_charset_flags_from_data(cs);
cs->caseup_multiply= 1;
cs->casedn_multiply= 1;
cs->pad_char= ' ';
@ -1371,6 +1442,7 @@ static void set_max_sort_char(struct charset_info_st *cs)
static my_bool my_coll_init_simple(struct charset_info_st *cs,
MY_CHARSET_LOADER *loader __attribute__((unused)))
{
cs->state|= my_8bit_collation_flags_from_data(cs);
set_max_sort_char(cs);
return FALSE;
}

View file

@ -973,48 +973,6 @@ my_charset_is_ascii_based(CHARSET_INFO *cs)
}
/*
Detect if a character set is 8bit,
and it is pure ascii, i.e. doesn't have
characters outside U+0000..U+007F
This functions is shared between "conf_to_src"
and dynamic charsets loader in "mysqld".
*/
my_bool
my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs)
{
size_t code;
if (!cs->tab_to_uni)
return 0;
for (code= 0; code < 256; code++)
{
if (cs->tab_to_uni[code] > 0x7F)
return 0;
}
return 1;
}
/*
Shared function between conf_to_src and mysys.
Check if a 8bit character set is compatible with
ascii on the range 0x00..0x7F.
*/
my_bool
my_charset_is_ascii_compatible(CHARSET_INFO *cs)
{
uint i;
if (!cs->tab_to_uni)
return 1;
for (i= 0; i < 128; i++)
{
if (cs->tab_to_uni[i] != i)
return 0;
}
return 1;
}
/*
Convert a string between two character sets.
'to' must be large enough to store (form_length * to_cs->mbmaxlen) bytes.

View file

@ -101,6 +101,11 @@ static inline const uchar *skip_trailing_space(const uchar *ptr,size_t len)
return (end);
}
uint my_8bit_charset_flags_from_data(CHARSET_INFO *cs);
uint my_8bit_collation_flags_from_data(CHARSET_INFO *cs);
/* Macros for hashing characters */
#define MY_HASH_ADD(A, B, value) \