mirror of
https://github.com/MariaDB/server.git
synced 2025-01-16 03:52:35 +01:00
MDEV-27009 Add UCA-14.0.0 collations - adding version aware implicit weight handling
Implicit weights are now handled according to the Unicode version (14.0.0 vs earlier versions). - Adding a new member MY_UCA_INFO::version - Copy logical positions and the version from "src_uca" to "new_uca" in init_weight_level(). - Adding a "const MY_UCA_INFO *" parameter to a few functions to know Unicode version to generate implicit weights accordingly: - during the collation initialization time, to pages which are a mixture of explicit and implicit weights - during comparison time, for fully implicit pages
This commit is contained in:
parent
d7ffb7c3dd
commit
6bc10f8026
2 changed files with 40 additions and 13 deletions
|
@ -168,6 +168,9 @@ typedef struct uca_info_st
|
||||||
my_wc_t first_variable;
|
my_wc_t first_variable;
|
||||||
my_wc_t last_variable;
|
my_wc_t last_variable;
|
||||||
|
|
||||||
|
/* Unicode version */
|
||||||
|
uint version;
|
||||||
|
|
||||||
} MY_UCA_INFO;
|
} MY_UCA_INFO;
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -6580,6 +6580,9 @@ MY_UCA_INFO my_uca_v400=
|
||||||
|
|
||||||
0x0009, /* first_variable */
|
0x0009, /* first_variable */
|
||||||
0x2183, /* last_variable */
|
0x2183, /* last_variable */
|
||||||
|
|
||||||
|
/* Misc */
|
||||||
|
400 /* Version */
|
||||||
};
|
};
|
||||||
|
|
||||||
/******************************************************/
|
/******************************************************/
|
||||||
|
@ -30136,6 +30139,9 @@ MY_UCA_INFO my_uca_v520_th=
|
||||||
|
|
||||||
0x0009, /* first_variable if alt=non-ignorable: p != ignore */
|
0x0009, /* first_variable if alt=non-ignorable: p != ignore */
|
||||||
0x1D371, /* last_variable if alt=shifter: p,s,t == ignore */
|
0x1D371, /* last_variable if alt=shifter: p,s,t == ignore */
|
||||||
|
|
||||||
|
/* Misc */
|
||||||
|
520 /* Version */
|
||||||
};
|
};
|
||||||
|
|
||||||
MY_UCA_INFO my_uca_v520=
|
MY_UCA_INFO my_uca_v520=
|
||||||
|
@ -30183,6 +30189,9 @@ MY_UCA_INFO my_uca_v520=
|
||||||
|
|
||||||
0x0009, /* first_variable if alt=non-ignorable: p != ignore */
|
0x0009, /* first_variable if alt=non-ignorable: p != ignore */
|
||||||
0x1D371, /* last_variable if alt=shifter: p,s,t == ignore */
|
0x1D371, /* last_variable if alt=shifter: p,s,t == ignore */
|
||||||
|
|
||||||
|
/* Misc */
|
||||||
|
520 /* Version */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -31691,10 +31700,11 @@ my_uca_context_weight_find(my_uca_scanner *scanner, my_wc_t currwc,
|
||||||
/****************************************************************/
|
/****************************************************************/
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
my_uca_implicit_weight_put(uint16 *to, my_wc_t code, uint level)
|
my_uca_implicit_weight_put(uint16 *to, const MY_UCA_INFO *src_uca,
|
||||||
|
my_wc_t code, uint level)
|
||||||
{
|
{
|
||||||
MY_UCA_IMPLICIT_WEIGHT weight;
|
MY_UCA_IMPLICIT_WEIGHT weight;
|
||||||
weight= my_uca_implicit_weight_on_level(520, code, level);
|
weight= my_uca_implicit_weight_on_level(src_uca->version, code, level);
|
||||||
to[0]= weight.weight[0];
|
to[0]= weight.weight[0];
|
||||||
to[1]= weight.weight[1];
|
to[1]= weight.weight[1];
|
||||||
to[2]= 0;
|
to[2]= 0;
|
||||||
|
@ -31718,7 +31728,8 @@ static inline int
|
||||||
my_uca_scanner_next_implicit_primary(my_uca_scanner *scanner)
|
my_uca_scanner_next_implicit_primary(my_uca_scanner *scanner)
|
||||||
{
|
{
|
||||||
my_wc_t wc= (scanner->page << 8) + scanner->code;
|
my_wc_t wc= (scanner->page << 8) + scanner->code;
|
||||||
MY_UCA_IMPLICIT_WEIGHT weight= my_uca_520_implicit_weight_primary(wc);
|
uint version= scanner->cs->uca->version;
|
||||||
|
MY_UCA_IMPLICIT_WEIGHT weight= my_uca_implicit_weight_primary(version, wc);
|
||||||
scanner->implicit[0]= weight.weight[1]; /* The second weight */
|
scanner->implicit[0]= weight.weight[1]; /* The second weight */
|
||||||
scanner->implicit[1]= 0; /* 0 terminator */
|
scanner->implicit[1]= 0; /* 0 terminator */
|
||||||
scanner->wbeg= scanner->implicit;
|
scanner->wbeg= scanner->implicit;
|
||||||
|
@ -32509,7 +32520,7 @@ typedef struct my_coll_rules_st
|
||||||
{
|
{
|
||||||
uint version; /* Unicode version, e.g. 400 or 520 */
|
uint version; /* Unicode version, e.g. 400 or 520 */
|
||||||
uint strength; /* Number of levels */
|
uint strength; /* Number of levels */
|
||||||
MY_UCA_INFO *uca; /* Unicode weight data */
|
const MY_UCA_INFO *uca; /* Unicode weight data */
|
||||||
size_t nrules; /* Number of rules in the rule array */
|
size_t nrules; /* Number of rules in the rule array */
|
||||||
size_t mrules; /* Number of allocated rules */
|
size_t mrules; /* Number of allocated rules */
|
||||||
MY_COLL_RULE *rule; /* Rule array */
|
MY_COLL_RULE *rule; /* Rule array */
|
||||||
|
@ -33199,8 +33210,8 @@ my_coll_rule_parse(MY_COLL_RULES *rules,
|
||||||
Copies UCA weights for a given "uint" string
|
Copies UCA weights for a given "uint" string
|
||||||
to the given location.
|
to the given location.
|
||||||
|
|
||||||
|
@dst destination UCA weight level data
|
||||||
@src_uca source UCA weight data
|
@src_uca source UCA weight data
|
||||||
@dst_uca destination UCA weight data
|
|
||||||
@to destination address
|
@to destination address
|
||||||
@to_length size of destination
|
@to_length size of destination
|
||||||
@nweights OUT number of weights put to "to"
|
@nweights OUT number of weights put to "to"
|
||||||
|
@ -33212,6 +33223,7 @@ my_coll_rule_parse(MY_COLL_RULES *rules,
|
||||||
|
|
||||||
static my_bool
|
static my_bool
|
||||||
my_char_weight_put(MY_UCA_WEIGHT_LEVEL *dst,
|
my_char_weight_put(MY_UCA_WEIGHT_LEVEL *dst,
|
||||||
|
const MY_UCA_INFO *src_uca,
|
||||||
uint16 *to, size_t to_length, size_t *nweights,
|
uint16 *to, size_t to_length, size_t *nweights,
|
||||||
my_wc_t *str, size_t len)
|
my_wc_t *str, size_t len)
|
||||||
{
|
{
|
||||||
|
@ -33249,7 +33261,7 @@ my_char_weight_put(MY_UCA_WEIGHT_LEVEL *dst,
|
||||||
if (!from)
|
if (!from)
|
||||||
{
|
{
|
||||||
from= implicit_weights;
|
from= implicit_weights;
|
||||||
my_uca_implicit_weight_put(implicit_weights, *str, dst->levelno);
|
my_uca_implicit_weight_put(implicit_weights, src_uca, *str, dst->levelno);
|
||||||
}
|
}
|
||||||
str++;
|
str++;
|
||||||
len--;
|
len--;
|
||||||
|
@ -33306,6 +33318,7 @@ my_uca_copy_page(MY_CHARSET_LOADER *loader,
|
||||||
static my_bool
|
static my_bool
|
||||||
my_uca_generate_implicit_page(MY_CHARSET_LOADER *loader,
|
my_uca_generate_implicit_page(MY_CHARSET_LOADER *loader,
|
||||||
MY_UCA_WEIGHT_LEVEL *dst,
|
MY_UCA_WEIGHT_LEVEL *dst,
|
||||||
|
const MY_UCA_INFO *src_uca,
|
||||||
uint page)
|
uint page)
|
||||||
{
|
{
|
||||||
uint chc, size= 256 * dst->lengths[page] * sizeof(uint16);
|
uint chc, size= 256 * dst->lengths[page] * sizeof(uint16);
|
||||||
|
@ -33316,7 +33329,7 @@ my_uca_generate_implicit_page(MY_CHARSET_LOADER *loader,
|
||||||
for (chc= 0 ; chc < 256; chc++)
|
for (chc= 0 ; chc < 256; chc++)
|
||||||
{
|
{
|
||||||
uint16 *w= dst->weights[page] + chc * dst->lengths[page];
|
uint16 *w= dst->weights[page] + chc * dst->lengths[page];
|
||||||
my_uca_implicit_weight_put(w, (page << 8) + chc, dst->levelno);
|
my_uca_implicit_weight_put(w, src_uca, (page << 8) + chc, dst->levelno);
|
||||||
}
|
}
|
||||||
return FALSE;
|
return FALSE;
|
||||||
}
|
}
|
||||||
|
@ -33495,7 +33508,7 @@ apply_one_rule(MY_CHARSET_LOADER *loader,
|
||||||
r->curr, (uint)nshift, r->with_context);
|
r->curr, (uint)nshift, r->with_context);
|
||||||
/* Store weights of the "reset to" character */
|
/* Store weights of the "reset to" character */
|
||||||
dst->contractions.nitems--; /* Temporarily hide - it's incomplete */
|
dst->contractions.nitems--; /* Temporarily hide - it's incomplete */
|
||||||
rc= my_char_weight_put(dst,
|
rc= my_char_weight_put(dst, rules->uca,
|
||||||
to, MY_UCA_CONTRACTION_MAX_WEIGHT_SIZE, &nweights,
|
to, MY_UCA_CONTRACTION_MAX_WEIGHT_SIZE, &nweights,
|
||||||
r->base, nreset);
|
r->base, nreset);
|
||||||
dst->contractions.nitems++; /* Activate, now it's complete */
|
dst->contractions.nitems++; /* Activate, now it's complete */
|
||||||
|
@ -33506,7 +33519,8 @@ apply_one_rule(MY_CHARSET_LOADER *loader,
|
||||||
DBUG_ASSERT(dst->weights[pagec]);
|
DBUG_ASSERT(dst->weights[pagec]);
|
||||||
to= my_char_weight_addr(dst, r->curr[0]);
|
to= my_char_weight_addr(dst, r->curr[0]);
|
||||||
/* Store weights of the "reset to" character */
|
/* Store weights of the "reset to" character */
|
||||||
rc= my_char_weight_put(dst, to, dst->lengths[pagec], &nweights, r->base, nreset);
|
rc= my_char_weight_put(dst, rules->uca,
|
||||||
|
to, dst->lengths[pagec], &nweights, r->base, nreset);
|
||||||
}
|
}
|
||||||
if (rc)
|
if (rc)
|
||||||
{
|
{
|
||||||
|
@ -33575,7 +33589,9 @@ static uint my_weight_size_on_page(const MY_UCA_WEIGHT_LEVEL *src, uint page)
|
||||||
*/
|
*/
|
||||||
static my_bool
|
static my_bool
|
||||||
my_uca_generate_page(MY_CHARSET_LOADER *loader,
|
my_uca_generate_page(MY_CHARSET_LOADER *loader,
|
||||||
MY_UCA_WEIGHT_LEVEL *dst, const MY_UCA_WEIGHT_LEVEL *src,
|
MY_UCA_WEIGHT_LEVEL *dst,
|
||||||
|
const MY_UCA_INFO *src_uca,
|
||||||
|
const MY_UCA_WEIGHT_LEVEL *src,
|
||||||
uint pageno)
|
uint pageno)
|
||||||
{
|
{
|
||||||
DBUG_ASSERT(dst->levelno == src->levelno);
|
DBUG_ASSERT(dst->levelno == src->levelno);
|
||||||
|
@ -33590,7 +33606,7 @@ my_uca_generate_page(MY_CHARSET_LOADER *loader,
|
||||||
Generate default weights for all characters on this page
|
Generate default weights for all characters on this page
|
||||||
algorithmically now, at initialization time.
|
algorithmically now, at initialization time.
|
||||||
*/
|
*/
|
||||||
my_uca_generate_implicit_page(loader, dst, pageno);
|
my_uca_generate_implicit_page(loader, dst, src_uca, pageno);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -33601,6 +33617,7 @@ my_uca_generate_page(MY_CHARSET_LOADER *loader,
|
||||||
static my_bool
|
static my_bool
|
||||||
my_uca_generate_pages(MY_CHARSET_LOADER *loader,
|
my_uca_generate_pages(MY_CHARSET_LOADER *loader,
|
||||||
MY_UCA_WEIGHT_LEVEL *dst,
|
MY_UCA_WEIGHT_LEVEL *dst,
|
||||||
|
const MY_UCA_INFO *src_uca,
|
||||||
const MY_UCA_WEIGHT_LEVEL *src,
|
const MY_UCA_WEIGHT_LEVEL *src,
|
||||||
uint npages)
|
uint npages)
|
||||||
{
|
{
|
||||||
|
@ -33625,7 +33642,7 @@ my_uca_generate_pages(MY_CHARSET_LOADER *loader,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Found a page with some special rules. */
|
/* Found a page with some special rules. */
|
||||||
if (my_uca_generate_page(loader, dst, src, page))
|
if (my_uca_generate_page(loader, dst, src_uca, src, page))
|
||||||
return TRUE;
|
return TRUE;
|
||||||
}
|
}
|
||||||
return FALSE;
|
return FALSE;
|
||||||
|
@ -33691,7 +33708,7 @@ init_weight_level(MY_CHARSET_LOADER *loader, MY_COLL_RULES *rules,
|
||||||
|
|
||||||
ncontractions += (int)src->contractions.nitems;
|
ncontractions += (int)src->contractions.nitems;
|
||||||
|
|
||||||
if ((my_uca_generate_pages(loader, dst, src, (uint)npages)))
|
if ((my_uca_generate_pages(loader, dst, rules->uca, src, (uint)npages)))
|
||||||
return TRUE;
|
return TRUE;
|
||||||
|
|
||||||
if (ncontractions)
|
if (ncontractions)
|
||||||
|
@ -33860,6 +33877,13 @@ create_tailoring(struct charset_info_st *cs,
|
||||||
}
|
}
|
||||||
cs->levels_for_order= rules.strength ? rules.strength : 1;
|
cs->levels_for_order= rules.strength ? rules.strength : 1;
|
||||||
|
|
||||||
|
/*
|
||||||
|
Copy logical positions, version, but don't copy levels -
|
||||||
|
they will be initialized below.
|
||||||
|
*/
|
||||||
|
new_uca= *src_uca;
|
||||||
|
bzero(&new_uca.level, sizeof(new_uca.level));
|
||||||
|
|
||||||
for (i= 0; i != cs->levels_for_order; i++)
|
for (i= 0; i != cs->levels_for_order; i++)
|
||||||
{
|
{
|
||||||
if ((rc= (src_uca->level[i].maxchar == 0)))
|
if ((rc= (src_uca->level[i].maxchar == 0)))
|
||||||
|
|
Loading…
Reference in a new issue