mirror of
https://github.com/MariaDB/server.git
synced 2026-05-11 17:40:11 +02:00
LDML refactoring for "MDEV-9711 NO PAD collations"
- Moving detection of the MY_CS_CSSORT, MY_CS_PUREASCII, MY_CS_NONASCII flags of loadable collations from add_collation() in mysys.c to my_cset_init_8bit() and my_coll_init_simple() in ctype-simple.c. - Adding tests that these flags are set properly for loadable collations - Moving LDML test related *.xml files from mysql-test/std_data/ to mysql-test/std_data/ldml/, as there will be more *.xml test files
This commit is contained in:
parent
addb38f476
commit
1ca595fbf7
13 changed files with 299 additions and 74 deletions
|
|
@ -193,25 +193,19 @@ static int my_read_charset_file(const char *filename)
|
|||
return FALSE;
|
||||
}
|
||||
|
||||
static int
|
||||
is_case_sensitive(CHARSET_INFO *cs)
|
||||
{
|
||||
return (cs->sort_order &&
|
||||
cs->sort_order['A'] < cs->sort_order['a'] &&
|
||||
cs->sort_order['a'] < cs->sort_order['B']) ? 1 : 0;
|
||||
}
|
||||
|
||||
|
||||
void dispcset(FILE *f,CHARSET_INFO *cs)
|
||||
{
|
||||
uint flags= my_8bit_charset_flags_from_data(cs) |
|
||||
my_8bit_collation_flags_from_data(cs);
|
||||
fprintf(f,"{\n");
|
||||
fprintf(f," %d,%d,%d,\n",cs->number,0,0);
|
||||
fprintf(f," MY_CS_COMPILED%s%s%s%s%s,\n",
|
||||
cs->state & MY_CS_BINSORT ? "|MY_CS_BINSORT" : "",
|
||||
cs->state & MY_CS_PRIMARY ? "|MY_CS_PRIMARY" : "",
|
||||
is_case_sensitive(cs) ? "|MY_CS_CSSORT" : "",
|
||||
my_charset_is_8bit_pure_ascii(cs) ? "|MY_CS_PUREASCII" : "",
|
||||
!my_charset_is_ascii_compatible(cs) ? "|MY_CS_NONASCII": "");
|
||||
flags & MY_CS_CSSORT ? "|MY_CS_CSSORT" : "",
|
||||
flags & MY_CS_PUREASCII ? "|MY_CS_PUREASCII" : "",
|
||||
flags & MY_CS_NONASCII ? "|MY_CS_NONASCII" : "");
|
||||
|
||||
if (cs->name)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -1340,9 +1340,80 @@ create_fromuni(struct charset_info_st *cs,
|
|||
return FALSE;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Detect if a character set is 8bit,
|
||||
and it is pure ascii, i.e. doesn't have
|
||||
characters outside U+0000..U+007F
|
||||
This functions is shared between "conf_to_src"
|
||||
and dynamic charsets loader in "mysqld".
|
||||
*/
|
||||
static my_bool
|
||||
my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs)
|
||||
{
|
||||
size_t code;
|
||||
if (!cs->tab_to_uni)
|
||||
return 0;
|
||||
for (code= 0; code < 256; code++)
|
||||
{
|
||||
if (cs->tab_to_uni[code] > 0x7F)
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Shared function between conf_to_src and mysys.
|
||||
Check if a 8bit character set is compatible with
|
||||
ascii on the range 0x00..0x7F.
|
||||
*/
|
||||
static my_bool
|
||||
my_charset_is_ascii_compatible(CHARSET_INFO *cs)
|
||||
{
|
||||
uint i;
|
||||
if (!cs->tab_to_uni)
|
||||
return 1;
|
||||
for (i= 0; i < 128; i++)
|
||||
{
|
||||
if (cs->tab_to_uni[i] != i)
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
uint my_8bit_charset_flags_from_data(CHARSET_INFO *cs)
|
||||
{
|
||||
uint flags= 0;
|
||||
if (my_charset_is_8bit_pure_ascii(cs))
|
||||
flags|= MY_CS_PUREASCII;
|
||||
if (!my_charset_is_ascii_compatible(cs))
|
||||
flags|= MY_CS_NONASCII;
|
||||
return flags;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Check if case sensitive sort order: A < a < B.
|
||||
We need MY_CS_FLAG for regex library, and for
|
||||
case sensitivity flag for 5.0 client protocol,
|
||||
to support isCaseSensitive() method in JDBC driver
|
||||
*/
|
||||
uint my_8bit_collation_flags_from_data(CHARSET_INFO *cs)
|
||||
{
|
||||
uint flags= 0;
|
||||
if (cs->sort_order && cs->sort_order['A'] < cs->sort_order['a'] &&
|
||||
cs->sort_order['a'] < cs->sort_order['B'])
|
||||
flags|= MY_CS_CSSORT;
|
||||
return flags;
|
||||
}
|
||||
|
||||
|
||||
static my_bool
|
||||
my_cset_init_8bit(struct charset_info_st *cs, MY_CHARSET_LOADER *loader)
|
||||
{
|
||||
cs->state|= my_8bit_charset_flags_from_data(cs);
|
||||
cs->caseup_multiply= 1;
|
||||
cs->casedn_multiply= 1;
|
||||
cs->pad_char= ' ';
|
||||
|
|
@ -1371,6 +1442,7 @@ static void set_max_sort_char(struct charset_info_st *cs)
|
|||
static my_bool my_coll_init_simple(struct charset_info_st *cs,
|
||||
MY_CHARSET_LOADER *loader __attribute__((unused)))
|
||||
{
|
||||
cs->state|= my_8bit_collation_flags_from_data(cs);
|
||||
set_max_sort_char(cs);
|
||||
return FALSE;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -973,48 +973,6 @@ my_charset_is_ascii_based(CHARSET_INFO *cs)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
Detect if a character set is 8bit,
|
||||
and it is pure ascii, i.e. doesn't have
|
||||
characters outside U+0000..U+007F
|
||||
This functions is shared between "conf_to_src"
|
||||
and dynamic charsets loader in "mysqld".
|
||||
*/
|
||||
my_bool
|
||||
my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs)
|
||||
{
|
||||
size_t code;
|
||||
if (!cs->tab_to_uni)
|
||||
return 0;
|
||||
for (code= 0; code < 256; code++)
|
||||
{
|
||||
if (cs->tab_to_uni[code] > 0x7F)
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Shared function between conf_to_src and mysys.
|
||||
Check if a 8bit character set is compatible with
|
||||
ascii on the range 0x00..0x7F.
|
||||
*/
|
||||
my_bool
|
||||
my_charset_is_ascii_compatible(CHARSET_INFO *cs)
|
||||
{
|
||||
uint i;
|
||||
if (!cs->tab_to_uni)
|
||||
return 1;
|
||||
for (i= 0; i < 128; i++)
|
||||
{
|
||||
if (cs->tab_to_uni[i] != i)
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Convert a string between two character sets.
|
||||
'to' must be large enough to store (form_length * to_cs->mbmaxlen) bytes.
|
||||
|
|
|
|||
|
|
@ -101,6 +101,11 @@ static inline const uchar *skip_trailing_space(const uchar *ptr,size_t len)
|
|||
return (end);
|
||||
}
|
||||
|
||||
|
||||
uint my_8bit_charset_flags_from_data(CHARSET_INFO *cs);
|
||||
uint my_8bit_collation_flags_from_data(CHARSET_INFO *cs);
|
||||
|
||||
|
||||
/* Macros for hashing characters */
|
||||
|
||||
#define MY_HASH_ADD(A, B, value) \
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue