mirror of
https://github.com/MariaDB/server.git
synced 2025-01-29 02:05:57 +01:00
Refactoring for MDEV-27042 and MDEV-27009
This patch prepares the code for upcoming changes: MDEV-27009 Add UCA-14.0.0 collations MDEV-27042 UCA: Resetting contractions to ignorable does not work well 1. Adding "const" qualifiers to return type and parameters in functions: - my_uca_contraction2_weight() - my_wmemcmp() - my_uca_contraction_weight() - my_uca_scanner_contraction_find() - my_uca_previous_context_find() - my_uca_context_weight_find() 2. Adding a helper function my_uca_true_contraction_eq() 3. Changing the way how scanner->wbeg is set during context weight handling. It was previously set inside functions: - my_uca_scanner_contraction_find() - my_uca_previous_context_find() Now it's set inside scanner_next(), which makes the code more symmetric for context-free and context-dependent sequences. This makes then upcoming fix for MDEV-27042 simpler.
This commit is contained in:
parent
86891b8538
commit
0a3d1d106a
3 changed files with 59 additions and 39 deletions
|
@ -135,8 +135,8 @@ typedef struct my_contraction_list_t
|
|||
|
||||
my_bool my_uca_can_be_contraction_head(const MY_CONTRACTIONS *c, my_wc_t wc);
|
||||
my_bool my_uca_can_be_contraction_tail(const MY_CONTRACTIONS *c, my_wc_t wc);
|
||||
uint16 *my_uca_contraction2_weight(const MY_CONTRACTIONS *c,
|
||||
my_wc_t wc1, my_wc_t wc2);
|
||||
const uint16 *my_uca_contraction2_weight(const MY_CONTRACTIONS *c,
|
||||
my_wc_t wc1, my_wc_t wc2);
|
||||
|
||||
|
||||
/* Collation weights on a single level (e.g. primary, secondary, tertiarty) */
|
||||
|
|
|
@ -31358,7 +31358,7 @@ my_uca_can_be_contraction_part(const MY_CONTRACTIONS *c, my_wc_t wc, int flag)
|
|||
@retval ptr - contraction weight array
|
||||
*/
|
||||
|
||||
uint16 *
|
||||
const uint16 *
|
||||
my_uca_contraction2_weight(const MY_CONTRACTIONS *list, my_wc_t wc1, my_wc_t wc2)
|
||||
{
|
||||
MY_CONTRACTION *c, *last;
|
||||
|
@ -31443,13 +31443,29 @@ my_uca_needs_context_handling(const MY_UCA_WEIGHT_LEVEL *level, my_wc_t wc)
|
|||
@retval non-zero - strings are different
|
||||
*/
|
||||
|
||||
static int
|
||||
my_wmemcmp(my_wc_t *a, my_wc_t *b, size_t len)
|
||||
static inline int
|
||||
my_wmemcmp(const my_wc_t *a, const my_wc_t *b, size_t len)
|
||||
{
|
||||
return memcmp(a, b, len * sizeof(my_wc_t));
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Test if the MY_CONTRACTION instance is equal to the wide
|
||||
string with the given length.
|
||||
Note, only true contractions are checked,
|
||||
while previous context pairs always return FALSE.
|
||||
*/
|
||||
static inline my_bool
|
||||
my_uca_true_contraction_eq(const MY_CONTRACTION *c,
|
||||
const my_wc_t *wc, size_t len)
|
||||
{
|
||||
return (len >= MY_UCA_MAX_CONTRACTION || c->ch[len] == 0) &&
|
||||
!c->with_context &&
|
||||
!my_wmemcmp(c->ch, wc, len);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
Check if a string is a contraction,
|
||||
and return its weight array on success.
|
||||
|
@ -31463,7 +31479,7 @@ my_wmemcmp(my_wc_t *a, my_wc_t *b, size_t len)
|
|||
@retval ptr - contraction weight array
|
||||
*/
|
||||
|
||||
static inline uint16 *
|
||||
static inline const uint16 *
|
||||
my_uca_contraction_weight(const MY_CONTRACTIONS *list, my_wc_t *wc, size_t len)
|
||||
{
|
||||
MY_CONTRACTION *c, *last;
|
||||
|
@ -31471,9 +31487,7 @@ my_uca_contraction_weight(const MY_CONTRACTIONS *list, my_wc_t *wc, size_t len)
|
|||
|
||||
for (c= list->item, last= c + list->nitems; c < last; c++)
|
||||
{
|
||||
if ((len >= MY_UCA_MAX_CONTRACTION || c->ch[len] == 0) &&
|
||||
!c->with_context &&
|
||||
!my_wmemcmp(c->ch, wc, len))
|
||||
if (my_uca_true_contraction_eq(c, wc, len))
|
||||
return c->weight;
|
||||
}
|
||||
return NULL;
|
||||
|
@ -31495,12 +31509,15 @@ my_uca_contraction_weight(const MY_CONTRACTIONS *list, my_wc_t *wc, size_t len)
|
|||
@retval ptr - contraction weight array
|
||||
*/
|
||||
|
||||
static uint16 *
|
||||
my_uca_scanner_contraction_find(my_uca_scanner *scanner, my_wc_t *wc)
|
||||
static const uint16 *
|
||||
my_uca_scanner_contraction_find(my_uca_scanner *scanner, my_wc_t currwc)
|
||||
{
|
||||
size_t clen= 1;
|
||||
int flag;
|
||||
const uchar *s, *beg[MY_UCA_MAX_CONTRACTION];
|
||||
my_wc_t wc[MY_UCA_MAX_CONTRACTION];
|
||||
wc[0]= currwc;
|
||||
|
||||
memset((void*) beg, 0, sizeof(beg));
|
||||
|
||||
/* Scan all contraction candidates */
|
||||
|
@ -31520,13 +31537,12 @@ my_uca_scanner_contraction_find(my_uca_scanner *scanner, my_wc_t *wc)
|
|||
/* Find among candidates the longest real contraction */
|
||||
for ( ; clen > 1; clen--)
|
||||
{
|
||||
uint16 *cweight;
|
||||
const uint16 *cweight;
|
||||
if (my_uca_can_be_contraction_tail(&scanner->level->contractions,
|
||||
wc[clen - 1]) &&
|
||||
(cweight= my_uca_contraction_weight(&scanner->level->contractions,
|
||||
wc, clen)))
|
||||
{
|
||||
scanner->wbeg= cweight + 1;
|
||||
scanner->sbeg= beg[clen - 1];
|
||||
return cweight;
|
||||
}
|
||||
|
@ -31549,19 +31565,15 @@ my_uca_scanner_contraction_find(my_uca_scanner *scanner, my_wc_t *wc)
|
|||
@retval ptr - contraction weight array
|
||||
*/
|
||||
|
||||
static uint16 *
|
||||
my_uca_previous_context_find(my_uca_scanner *scanner,
|
||||
static const uint16 *
|
||||
my_uca_previous_context_find(const MY_CONTRACTIONS *list,
|
||||
my_wc_t wc0, my_wc_t wc1)
|
||||
{
|
||||
const MY_CONTRACTIONS *list= &scanner->level->contractions;
|
||||
MY_CONTRACTION *c, *last;
|
||||
for (c= list->item, last= c + list->nitems; c < last; c++)
|
||||
{
|
||||
if (c->with_context && wc0 == c->ch[0] && wc1 == c->ch[1])
|
||||
{
|
||||
scanner->wbeg= c->weight + 1;
|
||||
return c->weight;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
@ -31584,10 +31596,11 @@ my_uca_previous_context_find(my_uca_scanner *scanner,
|
|||
@retval NULL if could not find any contextual weights for wc[0]
|
||||
@retval non null pointer to a zero-terminated weight string otherwise
|
||||
*/
|
||||
static inline uint16 *
|
||||
my_uca_context_weight_find(my_uca_scanner *scanner, my_wc_t *wc)
|
||||
static inline const uint16 *
|
||||
my_uca_context_weight_find(my_uca_scanner *scanner, my_wc_t currwc)
|
||||
{
|
||||
uint16 *cweight;
|
||||
const uint16 *cweight;
|
||||
my_wc_t prevwc;
|
||||
DBUG_ASSERT(scanner->level->contractions.nitems);
|
||||
/*
|
||||
If we have scanned a character which can have previous context,
|
||||
|
@ -31599,21 +31612,22 @@ my_uca_context_weight_find(my_uca_scanner *scanner, my_wc_t *wc)
|
|||
context at the moment. CLDR does not have longer sequences.
|
||||
*/
|
||||
if (my_uca_can_be_previous_context_tail(&scanner->level->contractions,
|
||||
wc[0]) &&
|
||||
currwc) &&
|
||||
scanner->wbeg != nochar && /* if not the very first character */
|
||||
my_uca_can_be_previous_context_head(&scanner->level->contractions,
|
||||
(wc[1]= ((scanner->page << 8) +
|
||||
(prevwc= ((scanner->page << 8) +
|
||||
scanner->code))) &&
|
||||
(cweight= my_uca_previous_context_find(scanner, wc[1], wc[0])))
|
||||
(cweight= my_uca_previous_context_find(&scanner->level->contractions,
|
||||
prevwc, currwc)))
|
||||
{
|
||||
scanner->page= scanner->code= 0; /* Clear for the next character */
|
||||
return cweight;
|
||||
}
|
||||
else if (my_uca_can_be_contraction_head(&scanner->level->contractions,
|
||||
wc[0]))
|
||||
currwc))
|
||||
{
|
||||
/* Check if w[0] starts a contraction */
|
||||
if ((cweight= my_uca_scanner_contraction_find(scanner, wc)))
|
||||
if ((cweight= my_uca_scanner_contraction_find(scanner, currwc)))
|
||||
return cweight;
|
||||
}
|
||||
return NULL;
|
||||
|
|
|
@ -52,28 +52,31 @@ MY_FUNCTION_NAME(scanner_next)(my_uca_scanner *scanner)
|
|||
do
|
||||
{
|
||||
const uint16 *wpage;
|
||||
my_wc_t wc[MY_UCA_MAX_CONTRACTION];
|
||||
int mblen;
|
||||
my_wc_t currwc;
|
||||
|
||||
/* Get next character */
|
||||
#if MY_UCA_ASCII_OPTIMIZE
|
||||
/* Get next ASCII character */
|
||||
if (scanner->sbeg < scanner->send && scanner->sbeg[0] < 0x80)
|
||||
{
|
||||
wc[0]= scanner->sbeg[0];
|
||||
currwc= scanner->sbeg[0];
|
||||
scanner->sbeg+= 1;
|
||||
|
||||
#if MY_UCA_COMPILE_CONTRACTIONS
|
||||
if (my_uca_needs_context_handling(scanner->level, wc[0]))
|
||||
if (my_uca_needs_context_handling(scanner->level, currwc))
|
||||
{
|
||||
uint16 *cweight= my_uca_context_weight_find(scanner, wc);
|
||||
const uint16 *cweight= my_uca_context_weight_find(scanner, currwc);
|
||||
if (cweight)
|
||||
{
|
||||
scanner->wbeg= cweight + 1;
|
||||
return *cweight;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
scanner->page= 0;
|
||||
scanner->code= (int) wc[0];
|
||||
scanner->code= (int) currwc;
|
||||
scanner->wbeg= scanner->level->weights[0] + scanner->code * scanner->level->lengths[0];
|
||||
if (scanner->wbeg[0])
|
||||
return *scanner->wbeg++;
|
||||
|
@ -82,8 +85,8 @@ MY_FUNCTION_NAME(scanner_next)(my_uca_scanner *scanner)
|
|||
else
|
||||
#endif
|
||||
/* Get next MB character */
|
||||
if (((mblen= MY_MB_WC(scanner, wc, scanner->sbeg,
|
||||
scanner->send)) <= 0))
|
||||
if (((mblen= MY_MB_WC(scanner, &currwc, scanner->sbeg,
|
||||
scanner->send)) <= 0))
|
||||
{
|
||||
if (scanner->sbeg >= scanner->send)
|
||||
return -1; /* No more bytes, end of line reached */
|
||||
|
@ -105,7 +108,7 @@ MY_FUNCTION_NAME(scanner_next)(my_uca_scanner *scanner)
|
|||
}
|
||||
|
||||
scanner->sbeg+= mblen;
|
||||
if (wc[0] > scanner->level->maxchar)
|
||||
if (currwc > scanner->level->maxchar)
|
||||
{
|
||||
/* Return 0xFFFD as weight for all characters outside BMP */
|
||||
scanner->wbeg= nochar;
|
||||
|
@ -113,17 +116,20 @@ MY_FUNCTION_NAME(scanner_next)(my_uca_scanner *scanner)
|
|||
}
|
||||
|
||||
#if MY_UCA_COMPILE_CONTRACTIONS
|
||||
if (my_uca_needs_context_handling(scanner->level, wc[0]))
|
||||
if (my_uca_needs_context_handling(scanner->level, currwc))
|
||||
{
|
||||
uint16 *cweight= my_uca_context_weight_find(scanner, wc);
|
||||
const uint16 *cweight= my_uca_context_weight_find(scanner, currwc);
|
||||
if (cweight)
|
||||
{
|
||||
scanner->wbeg= cweight + 1;
|
||||
return *cweight;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Process single character */
|
||||
scanner->page= wc[0] >> 8;
|
||||
scanner->code= wc[0] & 0xFF;
|
||||
scanner->page= currwc >> 8;
|
||||
scanner->code= currwc & 0xFF;
|
||||
|
||||
/* If weight page for w[0] does not exist, then calculate algoritmically */
|
||||
if (!(wpage= scanner->level->weights[scanner->page]))
|
||||
|
|
Loading…
Add table
Reference in a new issue