mirror of
https://github.com/MariaDB/server.git
synced 2025-01-15 19:42:28 +01:00
MDEV-27009 Add UCA-14.0.0 collations - adding version aware implicit weight handling
Implicit weights are now handled according to the Unicode version (14.0.0 vs earlier versions). - Adding a new member MY_UCA_INFO::version - Copy logical positions and the version from "src_uca" to "new_uca" in init_weight_level(). - Adding a "const MY_UCA_INFO *" parameter to a few functions to know Unicode version to generate implicit weights accordingly: - during the collation initialization time, to pages which are a mixture of explicit and implicit weights - during comparison time, for fully implicit pages
This commit is contained in:
parent
d7ffb7c3dd
commit
6bc10f8026
2 changed files with 40 additions and 13 deletions
|
@ -168,6 +168,9 @@ typedef struct uca_info_st
|
|||
my_wc_t first_variable;
|
||||
my_wc_t last_variable;
|
||||
|
||||
/* Unicode version */
|
||||
uint version;
|
||||
|
||||
} MY_UCA_INFO;
|
||||
|
||||
|
||||
|
|
|
@ -6580,6 +6580,9 @@ MY_UCA_INFO my_uca_v400=
|
|||
|
||||
0x0009, /* first_variable */
|
||||
0x2183, /* last_variable */
|
||||
|
||||
/* Misc */
|
||||
400 /* Version */
|
||||
};
|
||||
|
||||
/******************************************************/
|
||||
|
@ -30136,6 +30139,9 @@ MY_UCA_INFO my_uca_v520_th=
|
|||
|
||||
0x0009, /* first_variable if alt=non-ignorable: p != ignore */
|
||||
0x1D371, /* last_variable if alt=shifter: p,s,t == ignore */
|
||||
|
||||
/* Misc */
|
||||
520 /* Version */
|
||||
};
|
||||
|
||||
MY_UCA_INFO my_uca_v520=
|
||||
|
@ -30183,6 +30189,9 @@ MY_UCA_INFO my_uca_v520=
|
|||
|
||||
0x0009, /* first_variable if alt=non-ignorable: p != ignore */
|
||||
0x1D371, /* last_variable if alt=shifter: p,s,t == ignore */
|
||||
|
||||
/* Misc */
|
||||
520 /* Version */
|
||||
};
|
||||
|
||||
|
||||
|
@ -31691,10 +31700,11 @@ my_uca_context_weight_find(my_uca_scanner *scanner, my_wc_t currwc,
|
|||
/****************************************************************/
|
||||
|
||||
static inline void
|
||||
my_uca_implicit_weight_put(uint16 *to, my_wc_t code, uint level)
|
||||
my_uca_implicit_weight_put(uint16 *to, const MY_UCA_INFO *src_uca,
|
||||
my_wc_t code, uint level)
|
||||
{
|
||||
MY_UCA_IMPLICIT_WEIGHT weight;
|
||||
weight= my_uca_implicit_weight_on_level(520, code, level);
|
||||
weight= my_uca_implicit_weight_on_level(src_uca->version, code, level);
|
||||
to[0]= weight.weight[0];
|
||||
to[1]= weight.weight[1];
|
||||
to[2]= 0;
|
||||
|
@ -31718,7 +31728,8 @@ static inline int
|
|||
my_uca_scanner_next_implicit_primary(my_uca_scanner *scanner)
|
||||
{
|
||||
my_wc_t wc= (scanner->page << 8) + scanner->code;
|
||||
MY_UCA_IMPLICIT_WEIGHT weight= my_uca_520_implicit_weight_primary(wc);
|
||||
uint version= scanner->cs->uca->version;
|
||||
MY_UCA_IMPLICIT_WEIGHT weight= my_uca_implicit_weight_primary(version, wc);
|
||||
scanner->implicit[0]= weight.weight[1]; /* The second weight */
|
||||
scanner->implicit[1]= 0; /* 0 terminator */
|
||||
scanner->wbeg= scanner->implicit;
|
||||
|
@ -32509,7 +32520,7 @@ typedef struct my_coll_rules_st
|
|||
{
|
||||
uint version; /* Unicode version, e.g. 400 or 520 */
|
||||
uint strength; /* Number of levels */
|
||||
MY_UCA_INFO *uca; /* Unicode weight data */
|
||||
const MY_UCA_INFO *uca; /* Unicode weight data */
|
||||
size_t nrules; /* Number of rules in the rule array */
|
||||
size_t mrules; /* Number of allocated rules */
|
||||
MY_COLL_RULE *rule; /* Rule array */
|
||||
|
@ -33199,8 +33210,8 @@ my_coll_rule_parse(MY_COLL_RULES *rules,
|
|||
Copies UCA weights for a given "uint" string
|
||||
to the given location.
|
||||
|
||||
@dst destination UCA weight level data
|
||||
@src_uca source UCA weight data
|
||||
@dst_uca destination UCA weight data
|
||||
@to destination address
|
||||
@to_length size of destination
|
||||
@nweights OUT number of weights put to "to"
|
||||
|
@ -33212,6 +33223,7 @@ my_coll_rule_parse(MY_COLL_RULES *rules,
|
|||
|
||||
static my_bool
|
||||
my_char_weight_put(MY_UCA_WEIGHT_LEVEL *dst,
|
||||
const MY_UCA_INFO *src_uca,
|
||||
uint16 *to, size_t to_length, size_t *nweights,
|
||||
my_wc_t *str, size_t len)
|
||||
{
|
||||
|
@ -33249,7 +33261,7 @@ my_char_weight_put(MY_UCA_WEIGHT_LEVEL *dst,
|
|||
if (!from)
|
||||
{
|
||||
from= implicit_weights;
|
||||
my_uca_implicit_weight_put(implicit_weights, *str, dst->levelno);
|
||||
my_uca_implicit_weight_put(implicit_weights, src_uca, *str, dst->levelno);
|
||||
}
|
||||
str++;
|
||||
len--;
|
||||
|
@ -33306,6 +33318,7 @@ my_uca_copy_page(MY_CHARSET_LOADER *loader,
|
|||
static my_bool
|
||||
my_uca_generate_implicit_page(MY_CHARSET_LOADER *loader,
|
||||
MY_UCA_WEIGHT_LEVEL *dst,
|
||||
const MY_UCA_INFO *src_uca,
|
||||
uint page)
|
||||
{
|
||||
uint chc, size= 256 * dst->lengths[page] * sizeof(uint16);
|
||||
|
@ -33316,7 +33329,7 @@ my_uca_generate_implicit_page(MY_CHARSET_LOADER *loader,
|
|||
for (chc= 0 ; chc < 256; chc++)
|
||||
{
|
||||
uint16 *w= dst->weights[page] + chc * dst->lengths[page];
|
||||
my_uca_implicit_weight_put(w, (page << 8) + chc, dst->levelno);
|
||||
my_uca_implicit_weight_put(w, src_uca, (page << 8) + chc, dst->levelno);
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
@ -33495,7 +33508,7 @@ apply_one_rule(MY_CHARSET_LOADER *loader,
|
|||
r->curr, (uint)nshift, r->with_context);
|
||||
/* Store weights of the "reset to" character */
|
||||
dst->contractions.nitems--; /* Temporarily hide - it's incomplete */
|
||||
rc= my_char_weight_put(dst,
|
||||
rc= my_char_weight_put(dst, rules->uca,
|
||||
to, MY_UCA_CONTRACTION_MAX_WEIGHT_SIZE, &nweights,
|
||||
r->base, nreset);
|
||||
dst->contractions.nitems++; /* Activate, now it's complete */
|
||||
|
@ -33506,7 +33519,8 @@ apply_one_rule(MY_CHARSET_LOADER *loader,
|
|||
DBUG_ASSERT(dst->weights[pagec]);
|
||||
to= my_char_weight_addr(dst, r->curr[0]);
|
||||
/* Store weights of the "reset to" character */
|
||||
rc= my_char_weight_put(dst, to, dst->lengths[pagec], &nweights, r->base, nreset);
|
||||
rc= my_char_weight_put(dst, rules->uca,
|
||||
to, dst->lengths[pagec], &nweights, r->base, nreset);
|
||||
}
|
||||
if (rc)
|
||||
{
|
||||
|
@ -33575,7 +33589,9 @@ static uint my_weight_size_on_page(const MY_UCA_WEIGHT_LEVEL *src, uint page)
|
|||
*/
|
||||
static my_bool
|
||||
my_uca_generate_page(MY_CHARSET_LOADER *loader,
|
||||
MY_UCA_WEIGHT_LEVEL *dst, const MY_UCA_WEIGHT_LEVEL *src,
|
||||
MY_UCA_WEIGHT_LEVEL *dst,
|
||||
const MY_UCA_INFO *src_uca,
|
||||
const MY_UCA_WEIGHT_LEVEL *src,
|
||||
uint pageno)
|
||||
{
|
||||
DBUG_ASSERT(dst->levelno == src->levelno);
|
||||
|
@ -33590,7 +33606,7 @@ my_uca_generate_page(MY_CHARSET_LOADER *loader,
|
|||
Generate default weights for all characters on this page
|
||||
algorithmically now, at initialization time.
|
||||
*/
|
||||
my_uca_generate_implicit_page(loader, dst, pageno);
|
||||
my_uca_generate_implicit_page(loader, dst, src_uca, pageno);
|
||||
}
|
||||
|
||||
|
||||
|
@ -33601,6 +33617,7 @@ my_uca_generate_page(MY_CHARSET_LOADER *loader,
|
|||
static my_bool
|
||||
my_uca_generate_pages(MY_CHARSET_LOADER *loader,
|
||||
MY_UCA_WEIGHT_LEVEL *dst,
|
||||
const MY_UCA_INFO *src_uca,
|
||||
const MY_UCA_WEIGHT_LEVEL *src,
|
||||
uint npages)
|
||||
{
|
||||
|
@ -33625,7 +33642,7 @@ my_uca_generate_pages(MY_CHARSET_LOADER *loader,
|
|||
}
|
||||
|
||||
/* Found a page with some special rules. */
|
||||
if (my_uca_generate_page(loader, dst, src, page))
|
||||
if (my_uca_generate_page(loader, dst, src_uca, src, page))
|
||||
return TRUE;
|
||||
}
|
||||
return FALSE;
|
||||
|
@ -33691,7 +33708,7 @@ init_weight_level(MY_CHARSET_LOADER *loader, MY_COLL_RULES *rules,
|
|||
|
||||
ncontractions += (int)src->contractions.nitems;
|
||||
|
||||
if ((my_uca_generate_pages(loader, dst, src, (uint)npages)))
|
||||
if ((my_uca_generate_pages(loader, dst, rules->uca, src, (uint)npages)))
|
||||
return TRUE;
|
||||
|
||||
if (ncontractions)
|
||||
|
@ -33860,6 +33877,13 @@ create_tailoring(struct charset_info_st *cs,
|
|||
}
|
||||
cs->levels_for_order= rules.strength ? rules.strength : 1;
|
||||
|
||||
/*
|
||||
Copy logical positions, version, but don't copy levels -
|
||||
they will be initialized below.
|
||||
*/
|
||||
new_uca= *src_uca;
|
||||
bzero(&new_uca.level, sizeof(new_uca.level));
|
||||
|
||||
for (i= 0; i != cs->levels_for_order; i++)
|
||||
{
|
||||
if ((rc= (src_uca->level[i].maxchar == 0)))
|
||||
|
|
Loading…
Reference in a new issue