mirror of
https://github.com/MariaDB/server.git
synced 2025-01-15 19:42:28 +01:00
MDEV-31069 Reuse duplicate char-to-weight conversion code in ctype-utf8.c and ctype-ucs2.c
Removing similar functions from ctype-utf8.c and ctype-ucs2.c - my_tosort_utf16() - my_tosort_utf32() - my_tosort_ucs2() - my_tosort_unicode() Adding new shared functions into ctype-unidata.h: - my_tosort_unicode_bmp() - reused for utf8mb3, ucs2 - my_tosort_unicode() - reused for utf8mb4, utf16, utf32 For simplicity, the new version of my_tosort_unicode*() does not include the code handling the MY_CS_LOWER_SORT flag because: - it affects performance negatively - we don't have any collations with this flag yet anyway (This code was most likely earlier erroneously merged from MySQL's utf8_tolower_ci at some point.)
This commit is contained in:
parent
30b4bb4204
commit
2ad287caad
3 changed files with 39 additions and 70 deletions
|
@ -1284,22 +1284,6 @@ my_uni_utf16(CHARSET_INFO *cs __attribute__((unused)),
|
|||
const char charset_name_utf16le[]= "utf16le";
|
||||
#define charset_name_utf16le_length (sizeof(charset_name_utf16le)-1)
|
||||
|
||||
static inline void
|
||||
my_tosort_utf16(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
|
||||
{
|
||||
if (*wc <= uni_plane->maxchar)
|
||||
{
|
||||
MY_UNICASE_CHARACTER *page;
|
||||
if ((page= uni_plane->page[*wc >> 8]))
|
||||
*wc= page[*wc & 0xFF].sort;
|
||||
}
|
||||
else
|
||||
{
|
||||
*wc= MY_CS_REPLACEMENT_CHARACTER;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
static size_t
|
||||
my_caseup_utf16(CHARSET_INFO *cs, const char *src, size_t srclen,
|
||||
|
@ -1341,7 +1325,7 @@ my_hash_sort_utf16_nopad(CHARSET_INFO *cs,
|
|||
|
||||
while ((s < e) && (res= mb_wc(cs, &wc, (uchar *) s, (uchar *) e)) > 0)
|
||||
{
|
||||
my_tosort_utf16(uni_plane, &wc);
|
||||
my_tosort_unicode(uni_plane, &wc);
|
||||
MY_HASH_ADD_16(m1, m2, wc);
|
||||
s+= res;
|
||||
}
|
||||
|
@ -2178,22 +2162,6 @@ my_uni_utf32(CHARSET_INFO *cs __attribute__((unused)),
|
|||
}
|
||||
|
||||
|
||||
static inline void
|
||||
my_tosort_utf32(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
|
||||
{
|
||||
if (*wc <= uni_plane->maxchar)
|
||||
{
|
||||
MY_UNICASE_CHARACTER *page;
|
||||
if ((page= uni_plane->page[*wc >> 8]))
|
||||
*wc= page[*wc & 0xFF].sort;
|
||||
}
|
||||
else
|
||||
{
|
||||
*wc= MY_CS_REPLACEMENT_CHARACTER;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static size_t
|
||||
my_lengthsp_utf32(CHARSET_INFO *cs __attribute__((unused)),
|
||||
const char *ptr, size_t length)
|
||||
|
@ -2242,7 +2210,7 @@ my_hash_sort_utf32_nopad(CHARSET_INFO *cs, const uchar *s, size_t slen,
|
|||
|
||||
while ((res= my_utf32_uni(cs, &wc, (uchar*) s, (uchar*) e)) > 0)
|
||||
{
|
||||
my_tosort_utf32(uni_plane, &wc);
|
||||
my_tosort_unicode(uni_plane, &wc);
|
||||
MY_HASH_ADD(m1, m2, (uint) (wc >> 24));
|
||||
MY_HASH_ADD(m1, m2, (uint) (wc >> 16) & 0xFF);
|
||||
MY_HASH_ADD(m1, m2, (uint) (wc >> 8) & 0xFF);
|
||||
|
@ -3082,14 +3050,6 @@ static int my_uni_ucs2(CHARSET_INFO *cs __attribute__((unused)) ,
|
|||
}
|
||||
|
||||
|
||||
static inline void
|
||||
my_tosort_ucs2(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
|
||||
{
|
||||
MY_UNICASE_CHARACTER *page;
|
||||
if ((page= uni_plane->page[(*wc >> 8) & 0xFF]))
|
||||
*wc= page[*wc & 0xFF].sort;
|
||||
}
|
||||
|
||||
static size_t my_caseup_ucs2(CHARSET_INFO *cs, const char *src, size_t srclen,
|
||||
char *dst, size_t dstlen)
|
||||
{
|
||||
|
@ -3125,7 +3085,7 @@ my_hash_sort_ucs2_nopad(CHARSET_INFO *cs, const uchar *s, size_t slen,
|
|||
|
||||
while ((s < e) && (res=my_ucs2_uni(cs,&wc, (uchar *)s, (uchar*)e)) >0)
|
||||
{
|
||||
my_tosort_ucs2(uni_plane, &wc);
|
||||
my_tosort_unicode_bmp(uni_plane, &wc);
|
||||
MY_HASH_ADD_16(m1, m2, wc);
|
||||
s+=res;
|
||||
}
|
||||
|
|
|
@ -36,6 +36,32 @@ static inline my_wc_t my_u300_toupper_7bit(uchar ch)
|
|||
}
|
||||
|
||||
|
||||
static inline void my_tosort_unicode_bmp(MY_UNICASE_INFO *uni_plane,
|
||||
my_wc_t *wc)
|
||||
{
|
||||
const MY_UNICASE_CHARACTER *page;
|
||||
DBUG_ASSERT(*wc <= uni_plane->maxchar);
|
||||
if ((page= uni_plane->page[*wc >> 8]))
|
||||
*wc= page[*wc & 0xFF].sort;
|
||||
}
|
||||
|
||||
|
||||
static inline void my_tosort_unicode(MY_UNICASE_INFO *uni_plane,
|
||||
my_wc_t *wc)
|
||||
{
|
||||
if (*wc <= uni_plane->maxchar)
|
||||
{
|
||||
const MY_UNICASE_CHARACTER *page;
|
||||
if ((page= uni_plane->page[*wc >> 8]))
|
||||
*wc= page[*wc & 0xFF].sort;
|
||||
}
|
||||
else
|
||||
{
|
||||
*wc= MY_CS_REPLACEMENT_CHARACTER;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static inline void
|
||||
my_tolower_unicode_bmp(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
|
||||
{
|
||||
|
|
|
@ -4638,23 +4638,6 @@ MY_UNICASE_INFO my_unicase_unicode520=
|
|||
};
|
||||
|
||||
|
||||
static inline void
|
||||
my_tosort_unicode(MY_UNICASE_INFO *uni_plane, my_wc_t *wc, uint flags)
|
||||
{
|
||||
if (*wc <= uni_plane->maxchar)
|
||||
{
|
||||
MY_UNICASE_CHARACTER *page;
|
||||
if ((page= uni_plane->page[*wc >> 8]))
|
||||
*wc= (flags & MY_CS_LOWER_SORT) ?
|
||||
page[*wc & 0xFF].tolower :
|
||||
page[*wc & 0xFF].sort;
|
||||
}
|
||||
else
|
||||
{
|
||||
*wc= MY_CS_REPLACEMENT_CHARACTER;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static uint
|
||||
my_casefold_multiply_utf8mbx(CHARSET_INFO *cs)
|
||||
|
@ -4734,8 +4717,8 @@ int my_wildcmp_unicode_impl(CHARSET_INFO *cs,
|
|||
{
|
||||
if (weights)
|
||||
{
|
||||
my_tosort_unicode(weights, &s_wc, cs->state);
|
||||
my_tosort_unicode(weights, &w_wc, cs->state);
|
||||
my_tosort_unicode(weights, &s_wc);
|
||||
my_tosort_unicode(weights, &w_wc);
|
||||
}
|
||||
if (s_wc != w_wc)
|
||||
return 1; /* No match */
|
||||
|
@ -4803,8 +4786,8 @@ int my_wildcmp_unicode_impl(CHARSET_INFO *cs,
|
|||
return 1;
|
||||
if (weights)
|
||||
{
|
||||
my_tosort_unicode(weights, &s_wc, cs->state);
|
||||
my_tosort_unicode(weights, &w_wc, cs->state);
|
||||
my_tosort_unicode(weights, &s_wc);
|
||||
my_tosort_unicode(weights, &w_wc);
|
||||
}
|
||||
|
||||
if (s_wc == w_wc)
|
||||
|
@ -5242,7 +5225,7 @@ static void my_hash_sort_utf8mb3_nopad(CHARSET_INFO *cs, const uchar *s, size_t
|
|||
|
||||
while ((s < e) && (res=my_utf8mb3_uni(cs,&wc, (uchar *)s, (uchar*)e))>0 )
|
||||
{
|
||||
my_tosort_unicode(uni_plane, &wc, cs->state);
|
||||
my_tosort_unicode(uni_plane, &wc);
|
||||
MY_HASH_ADD_16(m1, m2, wc);
|
||||
s+= res;
|
||||
}
|
||||
|
@ -5976,8 +5959,8 @@ static int my_strnncoll_utf8mb3_cs(CHARSET_INFO *cs,
|
|||
save_diff = ((int)s_wc) - ((int)t_wc);
|
||||
}
|
||||
|
||||
my_tosort_unicode(uni_plane, &s_wc, cs->state);
|
||||
my_tosort_unicode(uni_plane, &t_wc, cs->state);
|
||||
my_tosort_unicode(uni_plane, &s_wc);
|
||||
my_tosort_unicode(uni_plane, &t_wc);
|
||||
|
||||
if ( s_wc != t_wc )
|
||||
{
|
||||
|
@ -6018,8 +6001,8 @@ static int my_strnncollsp_utf8mb3_cs(CHARSET_INFO *cs,
|
|||
save_diff = ((int)s_wc) - ((int)t_wc);
|
||||
}
|
||||
|
||||
my_tosort_unicode(uni_plane, &s_wc, cs->state);
|
||||
my_tosort_unicode(uni_plane, &t_wc, cs->state);
|
||||
my_tosort_unicode(uni_plane, &s_wc);
|
||||
my_tosort_unicode(uni_plane, &t_wc);
|
||||
|
||||
if ( s_wc != t_wc )
|
||||
{
|
||||
|
@ -7697,7 +7680,7 @@ my_hash_sort_utf8mb4_nopad(CHARSET_INFO *cs, const uchar *s, size_t slen,
|
|||
|
||||
while ((res= my_mb_wc_utf8mb4(cs, &wc, (uchar*) s, (uchar*) e)) > 0)
|
||||
{
|
||||
my_tosort_unicode(uni_plane, &wc, cs->state);
|
||||
my_tosort_unicode(uni_plane, &wc);
|
||||
MY_HASH_ADD_16(m1, m2, (uint) (wc & 0xFFFF));
|
||||
if (wc > 0xFFFF)
|
||||
{
|
||||
|
|
Loading…
Reference in a new issue