A follow-up patch MDEV-27266 Improve UCA collation performance for utf8mb3 and utf8mb4

Moving these members:

   CHARSET_INFO *cs;
   const MY_UCA_WEIGHT_LEVEL *level;

from my_uca_scanner to a new separate structure my_uca_scanner_param.

Rationale:

During a comparison of two strings these members were initialized two times
(one time for every string).

After the change these members initialized only one time inside
a shared instance of my_uca_scanner_param, and the instance is
shared between two scanners (its const address is passed as new a parameter
to the underlying scanner functions).

This change gives a slight performance improvement (~5%).
This commit is contained in:
Alexander Barkov 2022-09-02 13:23:24 +04:00
parent e71aca8200
commit f6118acda9
3 changed files with 109 additions and 72 deletions

View file

@ -43,10 +43,12 @@ static inline
#ifdef SCANNER_NEXT_NCHARS #ifdef SCANNER_NEXT_NCHARS
weight_and_nchars_t weight_and_nchars_t
MY_FUNCTION_NAME(scanner_next_with_nchars)(my_uca_scanner *scanner, MY_FUNCTION_NAME(scanner_next_with_nchars)(my_uca_scanner *scanner,
const my_uca_scanner_param *param,
size_t nchars) size_t nchars)
#else #else
int int
MY_FUNCTION_NAME(scanner_next)(my_uca_scanner *scanner) MY_FUNCTION_NAME(scanner_next)(my_uca_scanner *scanner,
const my_uca_scanner_param *param)
#endif #endif
{ {
#ifdef SCANNER_NEXT_NCHARS #ifdef SCANNER_NEXT_NCHARS
@ -82,7 +84,7 @@ MY_FUNCTION_NAME(scanner_next)(my_uca_scanner *scanner)
if (scanner->sbeg + 1 < scanner->send) if (scanner->sbeg + 1 < scanner->send)
{ {
const MY_UCA_2BYTES_ITEM *ww; const MY_UCA_2BYTES_ITEM *ww;
ww= my_uca_level_booster_2bytes_item_addr_const(scanner->level->booster, ww= my_uca_level_booster_2bytes_item_addr_const(param->level->booster,
scanner->sbeg[0], scanner->sbeg[0],
scanner->sbeg[1]); scanner->sbeg[1]);
if (my_uca_2bytes_item_is_applicable(ww)) if (my_uca_2bytes_item_is_applicable(ww))
@ -126,9 +128,10 @@ MY_FUNCTION_NAME(scanner_next)(my_uca_scanner *scanner)
scanner->sbeg+= 1; scanner->sbeg+= 1;
#if MY_UCA_COMPILE_CONTRACTIONS #if MY_UCA_COMPILE_CONTRACTIONS
if (my_uca_needs_context_handling(scanner->level, currwc)) if (my_uca_needs_context_handling(param->level, currwc))
{ {
const MY_CONTRACTION *cnt= my_uca_context_weight_find(scanner, currwc, const MY_CONTRACTION *cnt= my_uca_context_weight_find(scanner, param,
currwc,
LOCAL_MAX_CONTRACTION_LENGTH); LOCAL_MAX_CONTRACTION_LENGTH);
if (cnt) if (cnt)
{ {
@ -141,7 +144,7 @@ MY_FUNCTION_NAME(scanner_next)(my_uca_scanner *scanner)
scanner->page= 0; scanner->page= 0;
scanner->code= (int) currwc; scanner->code= (int) currwc;
cweight= scanner->level->weights[0] + scanner->code * scanner->level->lengths[0]; cweight= param->level->weights[0] + scanner->code * param->level->lengths[0];
if ((weight= my_uca_scanner_set_weight(scanner, cweight))) if ((weight= my_uca_scanner_set_weight(scanner, cweight)))
SCANNER_NEXT_RETURN(weight, ignorable_nchars + 1); SCANNER_NEXT_RETURN(weight, ignorable_nchars + 1);
continue; /* Ignorable character */ continue; /* Ignorable character */
@ -149,7 +152,7 @@ MY_FUNCTION_NAME(scanner_next)(my_uca_scanner *scanner)
else else
#endif #endif
/* Get next MB character */ /* Get next MB character */
if (((mblen= MY_MB_WC(scanner, &currwc, scanner->sbeg, if (((mblen= MY_MB_WC(scanner, param, &currwc, scanner->sbeg,
scanner->send)) <= 0)) scanner->send)) <= 0))
{ {
if (scanner->sbeg >= scanner->send) if (scanner->sbeg >= scanner->send)
@ -161,7 +164,7 @@ MY_FUNCTION_NAME(scanner_next)(my_uca_scanner *scanner)
There are some more bytes left. Non-positive mb_len means that There are some more bytes left. Non-positive mb_len means that
we got an incomplete or a bad byte sequence. Consume mbminlen bytes. we got an incomplete or a bad byte sequence. Consume mbminlen bytes.
*/ */
if ((scanner->sbeg+= scanner->cs->mbminlen) > scanner->send) if ((scanner->sbeg+= param->cs->mbminlen) > scanner->send)
{ {
/* For safety purposes don't go beyond the string range. */ /* For safety purposes don't go beyond the string range. */
scanner->sbeg= scanner->send; scanner->sbeg= scanner->send;
@ -175,16 +178,16 @@ MY_FUNCTION_NAME(scanner_next)(my_uca_scanner *scanner)
} }
scanner->sbeg+= mblen; scanner->sbeg+= mblen;
if (currwc > scanner->level->maxchar) if (currwc > param->level->maxchar)
{ {
SCANNER_NEXT_RETURN(my_uca_scanner_set_weight_outside_maxchar(scanner), SCANNER_NEXT_RETURN(my_uca_scanner_set_weight_outside_maxchar(scanner),
ignorable_nchars + 1); ignorable_nchars + 1);
} }
#if MY_UCA_COMPILE_CONTRACTIONS #if MY_UCA_COMPILE_CONTRACTIONS
if (my_uca_needs_context_handling(scanner->level, currwc)) if (my_uca_needs_context_handling(param->level, currwc))
{ {
const MY_CONTRACTION *cnt= my_uca_context_weight_find(scanner, currwc, const MY_CONTRACTION *cnt= my_uca_context_weight_find(scanner, param, currwc,
LOCAL_MAX_CONTRACTION_LENGTH); LOCAL_MAX_CONTRACTION_LENGTH);
if (cnt) if (cnt)
{ {
@ -200,12 +203,12 @@ MY_FUNCTION_NAME(scanner_next)(my_uca_scanner *scanner)
scanner->code= currwc & 0xFF; scanner->code= currwc & 0xFF;
/* If weight page for w[0] does not exist, then calculate algoritmically */ /* If weight page for w[0] does not exist, then calculate algoritmically */
if (!(wpage= scanner->level->weights[scanner->page])) if (!(wpage= param->level->weights[scanner->page]))
SCANNER_NEXT_RETURN(my_uca_scanner_next_implicit(scanner), SCANNER_NEXT_RETURN(my_uca_scanner_next_implicit(scanner, param),
ignorable_nchars + 1); ignorable_nchars + 1);
/* Calculate pointer to w[0]'s weight, using page and offset */ /* Calculate pointer to w[0]'s weight, using page and offset */
cweight= wpage + scanner->code * scanner->level->lengths[scanner->page]; cweight= wpage + scanner->code * param->level->lengths[scanner->page];
if ((weight= my_uca_scanner_set_weight(scanner, cweight))) if ((weight= my_uca_scanner_set_weight(scanner, cweight)))
SCANNER_NEXT_RETURN(weight, ignorable_nchars + 1); SCANNER_NEXT_RETURN(weight, ignorable_nchars + 1);
continue; /* Ignorable character */ continue; /* Ignorable character */

View file

@ -31312,6 +31312,13 @@ my_uca1400_info_tailored[MY_CS_ENCODING_LAST+1]
[MY_UCA1400_COLLATION_DEFINITION_COUNT]; [MY_UCA1400_COLLATION_DEFINITION_COUNT];
typedef struct my_uca_scanner_param_st
{
const MY_UCA_WEIGHT_LEVEL *level;
CHARSET_INFO *cs;
} my_uca_scanner_param;
/* /*
Unicode Collation Algorithm: Unicode Collation Algorithm:
Collation element (weight) scanner, Collation element (weight) scanner,
@ -31323,11 +31330,9 @@ typedef struct my_uca_scanner_st
const uint16 *wbeg; /* Beginning of the current weight string */ const uint16 *wbeg; /* Beginning of the current weight string */
const uchar *sbeg; /* Beginning of the input string */ const uchar *sbeg; /* Beginning of the input string */
const uchar *send; /* End of the input string */ const uchar *send; /* End of the input string */
const MY_UCA_WEIGHT_LEVEL *level;
uint16 implicit[2]; uint16 implicit[2];
int page; int page;
int code; int code;
CHARSET_INFO *cs;
} my_uca_scanner; } my_uca_scanner;
@ -31870,6 +31875,7 @@ my_uca_contraction_find(const MY_CONTRACTIONS *list, my_wc_t *wc, size_t len)
static const MY_CONTRACTION * static const MY_CONTRACTION *
my_uca_scanner_contraction_hash_find(my_uca_scanner *scanner, my_uca_scanner_contraction_hash_find(my_uca_scanner *scanner,
const my_uca_scanner_param *param,
my_wc_t currwc, my_wc_t currwc,
size_t max_char_length) size_t max_char_length)
{ {
@ -31887,10 +31893,10 @@ my_uca_scanner_contraction_hash_find(my_uca_scanner *scanner,
flag<<= 1) flag<<= 1)
{ {
int mblen; int mblen;
if ((mblen= my_ci_mb_wc(scanner->cs, &wc[clen], s, scanner->send)) <= 0) if ((mblen= my_ci_mb_wc(param->cs, &wc[clen], s, scanner->send)) <= 0)
break; break;
beg[clen]= s= s + mblen; beg[clen]= s= s + mblen;
if (!my_uca_can_be_contraction_part(&scanner->level->contractions, if (!my_uca_can_be_contraction_part(&param->level->contractions,
wc[clen++], flag)) wc[clen++], flag))
break; break;
} }
@ -31899,9 +31905,9 @@ my_uca_scanner_contraction_hash_find(my_uca_scanner *scanner,
for ( ; clen > 1; clen--) for ( ; clen > 1; clen--)
{ {
const MY_CONTRACTION *cnt; const MY_CONTRACTION *cnt;
if (my_uca_can_be_contraction_tail(&scanner->level->contractions, if (my_uca_can_be_contraction_tail(&param->level->contractions,
wc[clen - 1]) && wc[clen - 1]) &&
(cnt= my_uca_contraction_hash_find(&scanner->level->contraction_hash, (cnt= my_uca_contraction_hash_find(&param->level->contraction_hash,
wc, clen))) wc, clen)))
{ {
scanner->sbeg= beg[clen - 1]; scanner->sbeg= beg[clen - 1];
@ -31959,12 +31965,14 @@ my_uca_previous_context_find(const MY_CONTRACTIONS *list,
@retval non null pointer - the address of MY_CONTRACTION found @retval non null pointer - the address of MY_CONTRACTION found
*/ */
static inline const MY_CONTRACTION * static inline const MY_CONTRACTION *
my_uca_context_weight_find(my_uca_scanner *scanner, my_wc_t currwc, my_uca_context_weight_find(my_uca_scanner *scanner,
const my_uca_scanner_param *param,
my_wc_t currwc,
size_t max_char_length) size_t max_char_length)
{ {
const MY_CONTRACTION *cnt; const MY_CONTRACTION *cnt;
my_wc_t prevwc; my_wc_t prevwc;
DBUG_ASSERT(scanner->level->contractions.nitems); DBUG_ASSERT(param->level->contractions.nitems);
/* /*
If we have scanned a character which can have previous context, If we have scanned a character which can have previous context,
and there were some more characters already before, and there were some more characters already before,
@ -31974,23 +31982,23 @@ my_uca_context_weight_find(my_uca_scanner *scanner, my_wc_t currwc,
Note, we support only 2-character long sequences with previous Note, we support only 2-character long sequences with previous
context at the moment. CLDR does not have longer sequences. context at the moment. CLDR does not have longer sequences.
*/ */
if (my_uca_can_be_previous_context_tail(&scanner->level->contractions, if (my_uca_can_be_previous_context_tail(&param->level->contractions,
currwc) && currwc) &&
scanner->wbeg != nochar && /* if not the very first character */ scanner->wbeg != nochar && /* if not the very first character */
my_uca_can_be_previous_context_head(&scanner->level->contractions, my_uca_can_be_previous_context_head(&param->level->contractions,
(prevwc= ((scanner->page << 8) + (prevwc= ((scanner->page << 8) +
scanner->code))) && scanner->code))) &&
(cnt= my_uca_previous_context_find(&scanner->level->contractions, (cnt= my_uca_previous_context_find(&param->level->contractions,
prevwc, currwc))) prevwc, currwc)))
{ {
scanner->page= scanner->code= 0; /* Clear for the next character */ scanner->page= scanner->code= 0; /* Clear for the next character */
return cnt; return cnt;
} }
else if (my_uca_can_be_contraction_head(&scanner->level->contractions, else if (my_uca_can_be_contraction_head(&param->level->contractions,
currwc)) currwc))
{ {
/* Check if w[0] starts a contraction */ /* Check if w[0] starts a contraction */
if ((cnt= my_uca_scanner_contraction_hash_find(scanner, currwc, if ((cnt= my_uca_scanner_contraction_hash_find(scanner, param, currwc,
max_char_length))) max_char_length)))
return cnt; return cnt;
} }
@ -32026,10 +32034,11 @@ my_uca_implicit_weight_put(uint16 *to, const MY_UCA_INFO *src_uca,
*/ */
static inline int static inline int
my_uca_scanner_next_implicit_primary(my_uca_scanner *scanner) my_uca_scanner_next_implicit_primary(my_uca_scanner *scanner,
const my_uca_scanner_param *param)
{ {
my_wc_t wc= (scanner->page << 8) + scanner->code; my_wc_t wc= (scanner->page << 8) + scanner->code;
uint version= scanner->cs->uca->version; uint version= param->cs->uca->version;
MY_UCA_IMPLICIT_WEIGHT weight= my_uca_implicit_weight_primary(version, wc); MY_UCA_IMPLICIT_WEIGHT weight= my_uca_implicit_weight_primary(version, wc);
scanner->implicit[0]= weight.weight[1]; /* The second weight */ scanner->implicit[0]= weight.weight[1]; /* The second weight */
scanner->implicit[1]= 0; /* 0 terminator */ scanner->implicit[1]= 0; /* 0 terminator */
@ -32040,14 +32049,15 @@ my_uca_scanner_next_implicit_primary(my_uca_scanner *scanner)
/** /**
Return an implicit weight for the current level Return an implicit weight for the current level
(according to scanner->level->levelno). (according to param->level->levelno).
*/ */
static inline int static inline int
my_uca_scanner_next_implicit(my_uca_scanner *scanner) my_uca_scanner_next_implicit(my_uca_scanner *scanner,
const my_uca_scanner_param *param)
{ {
switch (scanner->level->levelno) { switch (param->level->levelno) {
case 0: return my_uca_scanner_next_implicit_primary(scanner);/* Primary level*/ case 0: return my_uca_scanner_next_implicit_primary(scanner, param);/* Primary level*/
case 1: scanner->wbeg= nochar; return 0x0020; /* Secondary level */ case 1: scanner->wbeg= nochar; return 0x0020; /* Secondary level */
case 2: scanner->wbeg= nochar; return 0x0002; /* Tertiary level */ case 2: scanner->wbeg= nochar; return 0x0002; /* Tertiary level */
default: scanner->wbeg= nochar; break; default: scanner->wbeg= nochar; break;
@ -32056,21 +32066,28 @@ my_uca_scanner_next_implicit(my_uca_scanner *scanner)
return 0; return 0;
} }
static void
my_uca_scanner_param_init(my_uca_scanner_param *param,
CHARSET_INFO *cs,
const MY_UCA_WEIGHT_LEVEL *level)
{
param->cs= cs;
param->level= level;
}
/* /*
The same two functions for any character set The same two functions for any character set
*/ */
static void static void
my_uca_scanner_init_any(my_uca_scanner *scanner, my_uca_scanner_init_any(my_uca_scanner *scanner,
CHARSET_INFO *cs,
const MY_UCA_WEIGHT_LEVEL *level,
const uchar *str, size_t length) const uchar *str, size_t length)
{ {
/* Note, no needs to initialize scanner->wbeg */ /* Note, no needs to initialize scanner->wbeg */
scanner->sbeg= str; scanner->sbeg= str;
scanner->send= str + length; scanner->send= str + length;
scanner->wbeg= nochar; scanner->wbeg= nochar;
scanner->level= level;
scanner->cs= cs;
} }
@ -34648,7 +34665,7 @@ static void my_uca_handler_map(struct charset_info_st *cs,
instead of generic. instead of generic.
*/ */
#define MY_FUNCTION_NAME(x) my_uca_ ## x ## _generic #define MY_FUNCTION_NAME(x) my_uca_ ## x ## _generic
#define MY_MB_WC(scanner, wc, beg, end) (my_ci_mb_wc(scanner->cs, wc, beg, end)) #define MY_MB_WC(scanner, param, wc, beg, end) (my_ci_mb_wc(param->cs, wc, beg, end))
#define MY_LIKE_RANGE my_like_range_generic #define MY_LIKE_RANGE my_like_range_generic
#define MY_UCA_ASCII_OPTIMIZE 0 #define MY_UCA_ASCII_OPTIMIZE 0
#define MY_UCA_COMPILE_CONTRACTIONS 1 #define MY_UCA_COMPILE_CONTRACTIONS 1
@ -34813,7 +34830,7 @@ ex:
#include "ctype-ucs2.h" #include "ctype-ucs2.h"
#define MY_FUNCTION_NAME(x) my_uca_ ## x ## _ucs2 #define MY_FUNCTION_NAME(x) my_uca_ ## x ## _ucs2
#define MY_MB_WC(scanner, wc, beg, end) (my_mb_wc_ucs2_quick(wc, beg, end)) #define MY_MB_WC(scanner, param, wc, beg, end) (my_mb_wc_ucs2_quick(wc, beg, end))
#define MY_LIKE_RANGE my_like_range_generic #define MY_LIKE_RANGE my_like_range_generic
#define MY_UCA_ASCII_OPTIMIZE 0 #define MY_UCA_ASCII_OPTIMIZE 0
#define MY_UCA_COMPILE_CONTRACTIONS 1 #define MY_UCA_COMPILE_CONTRACTIONS 1
@ -35775,7 +35792,7 @@ my_uca_coll_init_utf8mb3(struct charset_info_st *cs, MY_CHARSET_LOADER *loader);
#include "ctype-utf8.h" #include "ctype-utf8.h"
#define MY_FUNCTION_NAME(x) my_uca_ ## x ## _utf8mb3 #define MY_FUNCTION_NAME(x) my_uca_ ## x ## _utf8mb3
#define MY_MB_WC(scanner, wc, beg, end) (my_mb_wc_utf8mb3_quick(wc, beg, end)) #define MY_MB_WC(scanner, param, wc, beg, end) (my_mb_wc_utf8mb3_quick(wc, beg, end))
#define MY_LIKE_RANGE my_like_range_mb #define MY_LIKE_RANGE my_like_range_mb
#define MY_UCA_ASCII_OPTIMIZE 1 #define MY_UCA_ASCII_OPTIMIZE 1
#define MY_UCA_COMPILE_CONTRACTIONS 1 #define MY_UCA_COMPILE_CONTRACTIONS 1
@ -35783,7 +35800,7 @@ my_uca_coll_init_utf8mb3(struct charset_info_st *cs, MY_CHARSET_LOADER *loader);
#include "ctype-uca.inl" #include "ctype-uca.inl"
#define MY_FUNCTION_NAME(x) my_uca_ ## x ## _no_contractions_utf8mb3 #define MY_FUNCTION_NAME(x) my_uca_ ## x ## _no_contractions_utf8mb3
#define MY_MB_WC(scanner, wc, beg, end) (my_mb_wc_utf8mb3_quick(wc, beg, end)) #define MY_MB_WC(scanner, param, wc, beg, end) (my_mb_wc_utf8mb3_quick(wc, beg, end))
#define MY_LIKE_RANGE my_like_range_mb #define MY_LIKE_RANGE my_like_range_mb
#define MY_UCA_ASCII_OPTIMIZE 1 #define MY_UCA_ASCII_OPTIMIZE 1
#define MY_UCA_COMPILE_CONTRACTIONS 0 #define MY_UCA_COMPILE_CONTRACTIONS 0
@ -36780,7 +36797,7 @@ my_uca_coll_init_utf8mb4(struct charset_info_st *cs, MY_CHARSET_LOADER *loader);
#define MY_FUNCTION_NAME(x) my_uca_ ## x ## _utf8mb4 #define MY_FUNCTION_NAME(x) my_uca_ ## x ## _utf8mb4
#define MY_MB_WC(scanner, wc, beg, end) (my_mb_wc_utf8mb4_quick(wc, beg, end)) #define MY_MB_WC(scanner, param, wc, beg, end) (my_mb_wc_utf8mb4_quick(wc, beg, end))
#define MY_LIKE_RANGE my_like_range_mb #define MY_LIKE_RANGE my_like_range_mb
#define MY_UCA_ASCII_OPTIMIZE 1 #define MY_UCA_ASCII_OPTIMIZE 1
#define MY_UCA_COMPILE_CONTRACTIONS 1 #define MY_UCA_COMPILE_CONTRACTIONS 1
@ -36788,7 +36805,7 @@ my_uca_coll_init_utf8mb4(struct charset_info_st *cs, MY_CHARSET_LOADER *loader);
#include "ctype-uca.inl" #include "ctype-uca.inl"
#define MY_FUNCTION_NAME(x) my_uca_ ## x ## _no_contractions_utf8mb4 #define MY_FUNCTION_NAME(x) my_uca_ ## x ## _no_contractions_utf8mb4
#define MY_MB_WC(scanner, wc, beg, end) (my_mb_wc_utf8mb4_quick(wc, beg, end)) #define MY_MB_WC(scanner, param, wc, beg, end) (my_mb_wc_utf8mb4_quick(wc, beg, end))
#define MY_LIKE_RANGE my_like_range_mb #define MY_LIKE_RANGE my_like_range_mb
#define MY_UCA_ASCII_OPTIMIZE 1 #define MY_UCA_ASCII_OPTIMIZE 1
#define MY_UCA_COMPILE_CONTRACTIONS 0 #define MY_UCA_COMPILE_CONTRACTIONS 0
@ -37756,7 +37773,7 @@ struct charset_info_st my_charset_utf8mb4_unicode_520_nopad_ci=
#include "ctype-utf32.h" #include "ctype-utf32.h"
#define MY_FUNCTION_NAME(x) my_uca_ ## x ## _utf32 #define MY_FUNCTION_NAME(x) my_uca_ ## x ## _utf32
#define MY_MB_WC(scanner, wc, beg, end) (my_mb_wc_utf32_quick(wc, beg, end)) #define MY_MB_WC(scanner, param, wc, beg, end) (my_mb_wc_utf32_quick(wc, beg, end))
#define MY_LIKE_RANGE my_like_range_generic #define MY_LIKE_RANGE my_like_range_generic
#define MY_UCA_ASCII_OPTIMIZE 0 #define MY_UCA_ASCII_OPTIMIZE 0
#define MY_UCA_COMPILE_CONTRACTIONS 1 #define MY_UCA_COMPILE_CONTRACTIONS 1
@ -38713,7 +38730,7 @@ struct charset_info_st my_charset_utf32_unicode_520_nopad_ci=
#include "ctype-utf16.h" #include "ctype-utf16.h"
#define MY_FUNCTION_NAME(x) my_uca_ ## x ## _utf16 #define MY_FUNCTION_NAME(x) my_uca_ ## x ## _utf16
#define MY_MB_WC(scanner, wc, beg, end) (my_mb_wc_utf16_quick(wc, beg, end)) #define MY_MB_WC(scanner, param, wc, beg, end) (my_mb_wc_utf16_quick(wc, beg, end))
#define MY_LIKE_RANGE my_like_range_generic #define MY_LIKE_RANGE my_like_range_generic
#define MY_UCA_ASCII_OPTIMIZE 0 #define MY_UCA_ASCII_OPTIMIZE 0
#define MY_UCA_COMPILE_CONTRACTIONS 1 #define MY_UCA_COMPILE_CONTRACTIONS 1

View file

@ -93,6 +93,7 @@ MY_FUNCTION_NAME(strnncoll_onelevel)(CHARSET_INFO *cs,
{ {
my_uca_scanner sscanner; my_uca_scanner sscanner;
my_uca_scanner tscanner; my_uca_scanner tscanner;
my_uca_scanner_param param;
int s_res; int s_res;
int t_res; int t_res;
@ -104,14 +105,15 @@ MY_FUNCTION_NAME(strnncoll_onelevel)(CHARSET_INFO *cs,
t+= prefix, tlen-= prefix; t+= prefix, tlen-= prefix;
} }
#endif #endif
my_uca_scanner_init_any(&sscanner, cs, level, s, slen); my_uca_scanner_param_init(&param, cs, level);
my_uca_scanner_init_any(&tscanner, cs, level, t, tlen); my_uca_scanner_init_any(&sscanner, s, slen);
my_uca_scanner_init_any(&tscanner, t, tlen);
do do
{ {
s_res= MY_FUNCTION_NAME(scanner_next)(&sscanner); s_res= MY_FUNCTION_NAME(scanner_next)(&sscanner, &param);
t_res= MY_FUNCTION_NAME(scanner_next)(&tscanner); t_res= MY_FUNCTION_NAME(scanner_next)(&tscanner, &param);
} while ( s_res == t_res && s_res >0); } while ( s_res == t_res && s_res >0);
return (t_is_prefix && t_res < 0) ? 0 : (s_res - t_res); return (t_is_prefix && t_res < 0) ? 0 : (s_res - t_res);
@ -211,6 +213,7 @@ MY_FUNCTION_NAME(strnncollsp_onelevel)(CHARSET_INFO *cs,
const uchar *t, size_t tlen) const uchar *t, size_t tlen)
{ {
my_uca_scanner sscanner, tscanner; my_uca_scanner sscanner, tscanner;
my_uca_scanner_param param;
int s_res, t_res; int s_res, t_res;
#if MY_UCA_ASCII_OPTIMIZE #if MY_UCA_ASCII_OPTIMIZE
@ -222,13 +225,14 @@ MY_FUNCTION_NAME(strnncollsp_onelevel)(CHARSET_INFO *cs,
} }
#endif #endif
my_uca_scanner_init_any(&sscanner, cs, level, s, slen); my_uca_scanner_param_init(&param, cs, level);
my_uca_scanner_init_any(&tscanner, cs, level, t, tlen); my_uca_scanner_init_any(&sscanner, s, slen);
my_uca_scanner_init_any(&tscanner, t, tlen);
do do
{ {
s_res= MY_FUNCTION_NAME(scanner_next)(&sscanner); s_res= MY_FUNCTION_NAME(scanner_next)(&sscanner, &param);
t_res= MY_FUNCTION_NAME(scanner_next)(&tscanner); t_res= MY_FUNCTION_NAME(scanner_next)(&tscanner, &param);
} while ( s_res == t_res && s_res >0); } while ( s_res == t_res && s_res >0);
if (s_res > 0 && t_res < 0) if (s_res > 0 && t_res < 0)
@ -241,7 +245,7 @@ MY_FUNCTION_NAME(strnncollsp_onelevel)(CHARSET_INFO *cs,
{ {
if (s_res != t_res) if (s_res != t_res)
return (s_res - t_res); return (s_res - t_res);
s_res= MY_FUNCTION_NAME(scanner_next)(&sscanner); s_res= MY_FUNCTION_NAME(scanner_next)(&sscanner, &param);
} while (s_res > 0); } while (s_res > 0);
return 0; return 0;
} }
@ -256,7 +260,7 @@ MY_FUNCTION_NAME(strnncollsp_onelevel)(CHARSET_INFO *cs,
{ {
if (s_res != t_res) if (s_res != t_res)
return (s_res - t_res); return (s_res - t_res);
t_res= MY_FUNCTION_NAME(scanner_next)(&tscanner); t_res= MY_FUNCTION_NAME(scanner_next)(&tscanner, &param);
} while (t_res > 0); } while (t_res > 0);
return 0; return 0;
} }
@ -392,6 +396,7 @@ MY_FUNCTION_NAME(strnncollsp_nopad_multilevel)(CHARSET_INFO *cs,
*/ */
static inline weight_and_nchars_t static inline weight_and_nchars_t
MY_FUNCTION_NAME(scanner_next_pad_trim)(my_uca_scanner *scanner, MY_FUNCTION_NAME(scanner_next_pad_trim)(my_uca_scanner *scanner,
my_uca_scanner_param *param,
size_t nchars, size_t nchars,
uint *generated) uint *generated)
{ {
@ -399,14 +404,14 @@ MY_FUNCTION_NAME(scanner_next_pad_trim)(my_uca_scanner *scanner,
if (nchars > 0 || if (nchars > 0 ||
scanner->wbeg[0] /* Some weights from a previous expansion left */) scanner->wbeg[0] /* Some weights from a previous expansion left */)
{ {
if ((res= MY_FUNCTION_NAME(scanner_next_with_nchars)(scanner, if ((res= MY_FUNCTION_NAME(scanner_next_with_nchars)(scanner, param,
nchars)).weight < 0) nchars)).weight < 0)
{ {
/* /*
We reached the end of the string, but the caller wants more weights. We reached the end of the string, but the caller wants more weights.
Perform space padding. Perform space padding.
*/ */
res.weight= my_space_weight(scanner->level); res.weight= my_space_weight(param->level);
res.nchars= 1; res.nchars= 1;
(*generated)++; (*generated)++;
} }
@ -420,8 +425,8 @@ MY_FUNCTION_NAME(scanner_next_pad_trim)(my_uca_scanner *scanner,
e.g. CONCAT(x'00','a') with nchars=1. e.g. CONCAT(x'00','a') with nchars=1.
Perform trimming. Perform trimming.
*/ */
res.weight= scanner->cs->state & MY_CS_NOPAD ? res.weight= param->cs->state & MY_CS_NOPAD ?
0 : my_space_weight(scanner->level); 0 : my_space_weight(param->level);
res.nchars= (uint) nchars; res.nchars= (uint) nchars;
(*generated)++; (*generated)++;
} }
@ -429,8 +434,8 @@ MY_FUNCTION_NAME(scanner_next_pad_trim)(my_uca_scanner *scanner,
else else
{ {
/* The caller wants nchars==0. Perform trimming. */ /* The caller wants nchars==0. Perform trimming. */
res.weight= scanner->cs->state & MY_CS_NOPAD ? res.weight= param->cs->state & MY_CS_NOPAD ?
0 : my_space_weight(scanner->level); 0 : my_space_weight(param->level);
res.nchars= 0; res.nchars= 0;
(*generated)++; (*generated)++;
} }
@ -447,6 +452,7 @@ MY_FUNCTION_NAME(strnncollsp_nchars_onelevel)(CHARSET_INFO *cs,
{ {
my_uca_scanner sscanner; my_uca_scanner sscanner;
my_uca_scanner tscanner; my_uca_scanner tscanner;
my_uca_scanner_param param;
size_t s_nchars_left= nchars; size_t s_nchars_left= nchars;
size_t t_nchars_left= nchars; size_t t_nchars_left= nchars;
@ -462,8 +468,9 @@ TODO: strnncollsp_nchars_onelevel
#endif #endif
*/ */
my_uca_scanner_init_any(&sscanner, cs, level, s, slen); my_uca_scanner_param_init(&param, cs, level);
my_uca_scanner_init_any(&tscanner, cs, level, t, tlen); my_uca_scanner_init_any(&sscanner, s, slen);
my_uca_scanner_init_any(&tscanner, t, tlen);
for ( ; ; ) for ( ; ; )
{ {
@ -472,9 +479,11 @@ TODO: strnncollsp_nchars_onelevel
uint generated= 0; uint generated= 0;
int diff; int diff;
s_res= MY_FUNCTION_NAME(scanner_next_pad_trim)(&sscanner, s_nchars_left, s_res= MY_FUNCTION_NAME(scanner_next_pad_trim)(&sscanner, &param,
s_nchars_left,
&generated); &generated);
t_res= MY_FUNCTION_NAME(scanner_next_pad_trim)(&tscanner, t_nchars_left, t_res= MY_FUNCTION_NAME(scanner_next_pad_trim)(&tscanner, &param,
t_nchars_left,
&generated); &generated);
if ((diff= (s_res.weight - t_res.weight))) if ((diff= (s_res.weight - t_res.weight)))
return diff; return diff;
@ -604,12 +613,14 @@ MY_FUNCTION_NAME(hash_sort)(CHARSET_INFO *cs,
{ {
int s_res; int s_res;
my_uca_scanner scanner; my_uca_scanner scanner;
my_uca_scanner_param param;
int space_weight= my_space_weight(&cs->uca->level[0]); int space_weight= my_space_weight(&cs->uca->level[0]);
register ulong m1= *nr1, m2= *nr2; register ulong m1= *nr1, m2= *nr2;
my_uca_scanner_init_any(&scanner, cs, &cs->uca->level[0], s, slen); my_uca_scanner_param_init(&param, cs, &cs->uca->level[0]);
my_uca_scanner_init_any(&scanner, s, slen);
while ((s_res= MY_FUNCTION_NAME(scanner_next)(&scanner)) >0) while ((s_res= MY_FUNCTION_NAME(scanner_next)(&scanner, &param)) >0)
{ {
if (s_res == space_weight) if (s_res == space_weight)
{ {
@ -618,7 +629,7 @@ MY_FUNCTION_NAME(hash_sort)(CHARSET_INFO *cs,
do do
{ {
count++; count++;
if ((s_res= MY_FUNCTION_NAME(scanner_next)(&scanner)) <= 0) if ((s_res= MY_FUNCTION_NAME(scanner_next)(&scanner, &param)) <= 0)
{ {
/* Skip strings at end of string */ /* Skip strings at end of string */
goto end; goto end;
@ -658,11 +669,13 @@ MY_FUNCTION_NAME(hash_sort_nopad)(CHARSET_INFO *cs,
{ {
int s_res; int s_res;
my_uca_scanner scanner; my_uca_scanner scanner;
my_uca_scanner_param param;
register ulong m1= *nr1, m2= *nr2; register ulong m1= *nr1, m2= *nr2;
my_uca_scanner_init_any(&scanner, cs, &cs->uca->level[0], s, slen); my_uca_scanner_param_init(&param, cs, &cs->uca->level[0]);
my_uca_scanner_init_any(&scanner, s, slen);
while ((s_res= MY_FUNCTION_NAME(scanner_next)(&scanner)) >0) while ((s_res= MY_FUNCTION_NAME(scanner_next)(&scanner, &param)) >0)
{ {
/* See comment above why we can't use MY_HASH_ADD_16() */ /* See comment above why we can't use MY_HASH_ADD_16() */
MY_HASH_ADD(m1, m2, s_res >> 8); MY_HASH_ADD(m1, m2, s_res >> 8);
@ -713,6 +726,7 @@ MY_FUNCTION_NAME(strnxfrm_onelevel_internal)(CHARSET_INFO *cs,
const uchar *src, size_t srclen) const uchar *src, size_t srclen)
{ {
my_uca_scanner scanner; my_uca_scanner scanner;
my_uca_scanner_param param;
int s_res; int s_res;
DBUG_ASSERT(src || !srclen); DBUG_ASSERT(src || !srclen);
@ -756,9 +770,12 @@ MY_FUNCTION_NAME(strnxfrm_onelevel_internal)(CHARSET_INFO *cs,
} }
#endif #endif
my_uca_scanner_init_any(&scanner, cs, level, src, srclen); my_uca_scanner_param_init(&param, cs, level);
my_uca_scanner_init_any(&scanner, src, srclen);
for (; dst < de && *nweights && for (; dst < de && *nweights &&
(s_res= MY_FUNCTION_NAME(scanner_next)(&scanner)) > 0 ; (*nweights)--) (s_res= MY_FUNCTION_NAME(scanner_next)(&scanner, &param)) > 0 ;
(*nweights)--)
{ {
*dst++= s_res >> 8; *dst++= s_res >> 8;
if (dst < de) if (dst < de)