mirror of
https://github.com/MariaDB/server.git
synced 2026-04-26 02:05:31 +02:00
Simplify caseup() and casedn() in charsets
After the MDEV-13118 fix there's no code in the server that
wants caseup/casedn to change the argument in place for simple
charsets. Let's remove this logic and always return the result in a
new string for all charsets, both simple and complex.
1. Removing the optimization that *some* character sets used in casedn()
and caseup(), which allowed (and required) to change the case in-place,
overwriting the string passed as the "src" argument.
Now all CHARSET_INFO's work in the same way:
non of them change the source string in-place, all of them now convert
case from the source string to the destination string, leaving
the source string untouched.
2. Adding "const" qualifier to the "char *src" parameter
to caseup() and casedn().
3. Removing duplicate implementations in ctype-mb.c.
Now both caseup() and casedn() implementations for all CJK character sets
use internally the same function my_casefold_mb()
(the former my_casefold_mb_varlen()).
4. Removing the "unused" attribute from parameters of some my_case{up|dn}_xxx()
implementations, as the affected parameters are now *used* in the code.
Previously these parameters were used only in DBUG_ASSERT().
This commit is contained in:
parent
ab58493db2
commit
e2ac4098ed
9 changed files with 106 additions and 177 deletions
|
|
@ -1196,25 +1196,26 @@ my_tosort_utf16(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
|
|||
|
||||
|
||||
static size_t
|
||||
my_caseup_utf16(CHARSET_INFO *cs, char *src, size_t srclen,
|
||||
char *dst __attribute__((unused)),
|
||||
size_t dstlen __attribute__((unused)))
|
||||
my_caseup_utf16(CHARSET_INFO *cs, const char *src, size_t srclen,
|
||||
char *dst, size_t dstlen)
|
||||
{
|
||||
my_wc_t wc;
|
||||
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
|
||||
my_charset_conv_wc_mb wc_mb= cs->cset->wc_mb;
|
||||
int res;
|
||||
char *srcend= src + srclen;
|
||||
const char *srcend= src + srclen;
|
||||
char *dstend= dst + dstlen;
|
||||
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
|
||||
DBUG_ASSERT(src == dst && srclen == dstlen);
|
||||
DBUG_ASSERT(srclen <= dstlen);
|
||||
|
||||
while ((src < srcend) &&
|
||||
(res= mb_wc(cs, &wc, (uchar *) src, (uchar *) srcend)) > 0)
|
||||
{
|
||||
my_toupper_utf16(uni_plane, &wc);
|
||||
if (res != wc_mb(cs, wc, (uchar *) src, (uchar *) srcend))
|
||||
if (res != wc_mb(cs, wc, (uchar *) dst, (uchar *) dstend))
|
||||
break;
|
||||
src+= res;
|
||||
dst+= res;
|
||||
}
|
||||
return srclen;
|
||||
}
|
||||
|
|
@ -1243,25 +1244,26 @@ my_hash_sort_utf16(CHARSET_INFO *cs, const uchar *s, size_t slen,
|
|||
|
||||
|
||||
static size_t
|
||||
my_casedn_utf16(CHARSET_INFO *cs, char *src, size_t srclen,
|
||||
char *dst __attribute__((unused)),
|
||||
size_t dstlen __attribute__((unused)))
|
||||
my_casedn_utf16(CHARSET_INFO *cs, const char *src, size_t srclen,
|
||||
char *dst, size_t dstlen)
|
||||
{
|
||||
my_wc_t wc;
|
||||
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
|
||||
my_charset_conv_wc_mb wc_mb= cs->cset->wc_mb;
|
||||
int res;
|
||||
char *srcend= src + srclen;
|
||||
const char *srcend= src + srclen;
|
||||
char *dstend= dst + dstlen;
|
||||
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
|
||||
DBUG_ASSERT(src == dst && srclen == dstlen);
|
||||
DBUG_ASSERT(srclen <= dstlen);
|
||||
|
||||
while ((src < srcend) &&
|
||||
(res= mb_wc(cs, &wc, (uchar *) src, (uchar *) srcend)) > 0)
|
||||
{
|
||||
my_tolower_utf16(uni_plane, &wc);
|
||||
if (res != wc_mb(cs, wc, (uchar *) src, (uchar *) srcend))
|
||||
if (res != wc_mb(cs, wc, (uchar *) dst, (uchar *) dstend))
|
||||
break;
|
||||
src+= res;
|
||||
dst+= res;
|
||||
}
|
||||
return srclen;
|
||||
}
|
||||
|
|
@ -1987,23 +1989,24 @@ my_tosort_utf32(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
|
|||
|
||||
|
||||
static size_t
|
||||
my_caseup_utf32(CHARSET_INFO *cs, char *src, size_t srclen,
|
||||
char *dst __attribute__((unused)),
|
||||
size_t dstlen __attribute__((unused)))
|
||||
my_caseup_utf32(CHARSET_INFO *cs, const char *src, size_t srclen,
|
||||
char *dst, size_t dstlen)
|
||||
{
|
||||
my_wc_t wc;
|
||||
int res;
|
||||
char *srcend= src + srclen;
|
||||
const char *srcend= src + srclen;
|
||||
char *dstend= dst + dstlen;
|
||||
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
|
||||
DBUG_ASSERT(src == dst && srclen == dstlen);
|
||||
DBUG_ASSERT(srclen <= dstlen);
|
||||
|
||||
while ((src < srcend) &&
|
||||
(res= my_utf32_uni(cs, &wc, (uchar *)src, (uchar*) srcend)) > 0)
|
||||
{
|
||||
my_toupper_utf32(uni_plane, &wc);
|
||||
if (res != my_uni_utf32(cs, wc, (uchar*) src, (uchar*) srcend))
|
||||
if (res != my_uni_utf32(cs, wc, (uchar*) dst, (uchar*) dstend))
|
||||
break;
|
||||
src+= res;
|
||||
dst+= res;
|
||||
}
|
||||
return srclen;
|
||||
}
|
||||
|
|
@ -2038,22 +2041,23 @@ my_hash_sort_utf32(CHARSET_INFO *cs, const uchar *s, size_t slen,
|
|||
|
||||
|
||||
static size_t
|
||||
my_casedn_utf32(CHARSET_INFO *cs, char *src, size_t srclen,
|
||||
char *dst __attribute__((unused)),
|
||||
size_t dstlen __attribute__((unused)))
|
||||
my_casedn_utf32(CHARSET_INFO *cs, const char *src, size_t srclen,
|
||||
char *dst, size_t dstlen)
|
||||
{
|
||||
my_wc_t wc;
|
||||
int res;
|
||||
char *srcend= src + srclen;
|
||||
const char *srcend= src + srclen;
|
||||
char *dstend= dst + dstlen;
|
||||
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
|
||||
DBUG_ASSERT(src == dst && srclen == dstlen);
|
||||
DBUG_ASSERT(srclen <= dstlen);
|
||||
|
||||
while ((res= my_utf32_uni(cs, &wc, (uchar*) src, (uchar*) srcend)) > 0)
|
||||
{
|
||||
my_tolower_utf32(uni_plane,&wc);
|
||||
if (res != my_uni_utf32(cs, wc, (uchar*) src, (uchar*) srcend))
|
||||
if (res != my_uni_utf32(cs, wc, (uchar*) dst, (uchar*) dstend))
|
||||
break;
|
||||
src+= res;
|
||||
dst+= res;
|
||||
}
|
||||
return srclen;
|
||||
}
|
||||
|
|
@ -2950,23 +2954,24 @@ my_tosort_ucs2(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
|
|||
*wc= page[*wc & 0xFF].sort;
|
||||
}
|
||||
|
||||
static size_t my_caseup_ucs2(CHARSET_INFO *cs, char *src, size_t srclen,
|
||||
char *dst __attribute__((unused)),
|
||||
size_t dstlen __attribute__((unused)))
|
||||
static size_t my_caseup_ucs2(CHARSET_INFO *cs, const char *src, size_t srclen,
|
||||
char *dst, size_t dstlen)
|
||||
{
|
||||
my_wc_t wc;
|
||||
int res;
|
||||
char *srcend= src + srclen;
|
||||
const char *srcend= src + srclen;
|
||||
char *dstend= dst + dstlen;
|
||||
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
|
||||
DBUG_ASSERT(src == dst && srclen == dstlen);
|
||||
DBUG_ASSERT(srclen <= dstlen);
|
||||
|
||||
while ((src < srcend) &&
|
||||
(res= my_ucs2_uni(cs, &wc, (uchar *)src, (uchar*) srcend)) > 0)
|
||||
{
|
||||
my_toupper_ucs2(uni_plane, &wc);
|
||||
if (res != my_uni_ucs2(cs, wc, (uchar*) src, (uchar*) srcend))
|
||||
if (res != my_uni_ucs2(cs, wc, (uchar*) dst, (uchar*) dstend))
|
||||
break;
|
||||
src+= res;
|
||||
dst+= res;
|
||||
}
|
||||
return srclen;
|
||||
}
|
||||
|
|
@ -2995,23 +3000,24 @@ static void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, size_t slen,
|
|||
}
|
||||
|
||||
|
||||
static size_t my_casedn_ucs2(CHARSET_INFO *cs, char *src, size_t srclen,
|
||||
char *dst __attribute__((unused)),
|
||||
size_t dstlen __attribute__((unused)))
|
||||
static size_t my_casedn_ucs2(CHARSET_INFO *cs, const char *src, size_t srclen,
|
||||
char *dst, size_t dstlen)
|
||||
{
|
||||
my_wc_t wc;
|
||||
int res;
|
||||
char *srcend= src + srclen;
|
||||
const char *srcend= src + srclen;
|
||||
char *dstend= dst + dstlen;
|
||||
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
|
||||
DBUG_ASSERT(src == dst && srclen == dstlen);
|
||||
DBUG_ASSERT(srclen <= dstlen);
|
||||
|
||||
while ((src < srcend) &&
|
||||
(res= my_ucs2_uni(cs, &wc, (uchar*) src, (uchar*) srcend)) > 0)
|
||||
{
|
||||
my_tolower_ucs2(uni_plane, &wc);
|
||||
if (res != my_uni_ucs2(cs, wc, (uchar*) src, (uchar*) srcend))
|
||||
if (res != my_uni_ucs2(cs, wc, (uchar*) dst, (uchar*) dstend))
|
||||
break;
|
||||
src+= res;
|
||||
dst+= res;
|
||||
}
|
||||
return srclen;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue