Simplify caseup() and casedn() in charsets

After the MDEV-13118 fix there's no code in the server that
wants caseup/casedn to change the argument in place for simple
charsets.  Let's remove this logic and always return the result in a
new string for all charsets, both simple and complex.

1. Removing the optimization that *some* character sets used in casedn()
  and caseup(), which allowed (and required) to change the case in-place,
  overwriting the string passed as the "src" argument.
  Now all CHARSET_INFO's work in the same way:
  non of them change the source string in-place, all of them now convert
  case from the source string to the destination string, leaving
  the source string untouched.

2. Adding "const" qualifier to the "char *src" parameter
   to caseup() and casedn().

3. Removing duplicate implementations in ctype-mb.c.
  Now both caseup() and casedn() implementations for all CJK character sets
  use internally the same function my_casefold_mb()
  (the former my_casefold_mb_varlen()).

4. Removing the "unused" attribute from parameters of some my_case{up|dn}_xxx()
   implementations, as the affected parameters are now *used* in the code.
   Previously these parameters were used only in DBUG_ASSERT().
This commit is contained in:
Alexander Barkov 2018-07-19 13:02:14 +04:00
commit e2ac4098ed
9 changed files with 106 additions and 177 deletions

View file

@ -1196,25 +1196,26 @@ my_tosort_utf16(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
static size_t
my_caseup_utf16(CHARSET_INFO *cs, char *src, size_t srclen,
char *dst __attribute__((unused)),
size_t dstlen __attribute__((unused)))
my_caseup_utf16(CHARSET_INFO *cs, const char *src, size_t srclen,
char *dst, size_t dstlen)
{
my_wc_t wc;
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
my_charset_conv_wc_mb wc_mb= cs->cset->wc_mb;
int res;
char *srcend= src + srclen;
const char *srcend= src + srclen;
char *dstend= dst + dstlen;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
DBUG_ASSERT(src == dst && srclen == dstlen);
DBUG_ASSERT(srclen <= dstlen);
while ((src < srcend) &&
(res= mb_wc(cs, &wc, (uchar *) src, (uchar *) srcend)) > 0)
{
my_toupper_utf16(uni_plane, &wc);
if (res != wc_mb(cs, wc, (uchar *) src, (uchar *) srcend))
if (res != wc_mb(cs, wc, (uchar *) dst, (uchar *) dstend))
break;
src+= res;
dst+= res;
}
return srclen;
}
@ -1243,25 +1244,26 @@ my_hash_sort_utf16(CHARSET_INFO *cs, const uchar *s, size_t slen,
static size_t
my_casedn_utf16(CHARSET_INFO *cs, char *src, size_t srclen,
char *dst __attribute__((unused)),
size_t dstlen __attribute__((unused)))
my_casedn_utf16(CHARSET_INFO *cs, const char *src, size_t srclen,
char *dst, size_t dstlen)
{
my_wc_t wc;
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
my_charset_conv_wc_mb wc_mb= cs->cset->wc_mb;
int res;
char *srcend= src + srclen;
const char *srcend= src + srclen;
char *dstend= dst + dstlen;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
DBUG_ASSERT(src == dst && srclen == dstlen);
DBUG_ASSERT(srclen <= dstlen);
while ((src < srcend) &&
(res= mb_wc(cs, &wc, (uchar *) src, (uchar *) srcend)) > 0)
{
my_tolower_utf16(uni_plane, &wc);
if (res != wc_mb(cs, wc, (uchar *) src, (uchar *) srcend))
if (res != wc_mb(cs, wc, (uchar *) dst, (uchar *) dstend))
break;
src+= res;
dst+= res;
}
return srclen;
}
@ -1987,23 +1989,24 @@ my_tosort_utf32(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
static size_t
my_caseup_utf32(CHARSET_INFO *cs, char *src, size_t srclen,
char *dst __attribute__((unused)),
size_t dstlen __attribute__((unused)))
my_caseup_utf32(CHARSET_INFO *cs, const char *src, size_t srclen,
char *dst, size_t dstlen)
{
my_wc_t wc;
int res;
char *srcend= src + srclen;
const char *srcend= src + srclen;
char *dstend= dst + dstlen;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
DBUG_ASSERT(src == dst && srclen == dstlen);
DBUG_ASSERT(srclen <= dstlen);
while ((src < srcend) &&
(res= my_utf32_uni(cs, &wc, (uchar *)src, (uchar*) srcend)) > 0)
{
my_toupper_utf32(uni_plane, &wc);
if (res != my_uni_utf32(cs, wc, (uchar*) src, (uchar*) srcend))
if (res != my_uni_utf32(cs, wc, (uchar*) dst, (uchar*) dstend))
break;
src+= res;
dst+= res;
}
return srclen;
}
@ -2038,22 +2041,23 @@ my_hash_sort_utf32(CHARSET_INFO *cs, const uchar *s, size_t slen,
static size_t
my_casedn_utf32(CHARSET_INFO *cs, char *src, size_t srclen,
char *dst __attribute__((unused)),
size_t dstlen __attribute__((unused)))
my_casedn_utf32(CHARSET_INFO *cs, const char *src, size_t srclen,
char *dst, size_t dstlen)
{
my_wc_t wc;
int res;
char *srcend= src + srclen;
const char *srcend= src + srclen;
char *dstend= dst + dstlen;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
DBUG_ASSERT(src == dst && srclen == dstlen);
DBUG_ASSERT(srclen <= dstlen);
while ((res= my_utf32_uni(cs, &wc, (uchar*) src, (uchar*) srcend)) > 0)
{
my_tolower_utf32(uni_plane,&wc);
if (res != my_uni_utf32(cs, wc, (uchar*) src, (uchar*) srcend))
if (res != my_uni_utf32(cs, wc, (uchar*) dst, (uchar*) dstend))
break;
src+= res;
dst+= res;
}
return srclen;
}
@ -2950,23 +2954,24 @@ my_tosort_ucs2(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
*wc= page[*wc & 0xFF].sort;
}
static size_t my_caseup_ucs2(CHARSET_INFO *cs, char *src, size_t srclen,
char *dst __attribute__((unused)),
size_t dstlen __attribute__((unused)))
static size_t my_caseup_ucs2(CHARSET_INFO *cs, const char *src, size_t srclen,
char *dst, size_t dstlen)
{
my_wc_t wc;
int res;
char *srcend= src + srclen;
const char *srcend= src + srclen;
char *dstend= dst + dstlen;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
DBUG_ASSERT(src == dst && srclen == dstlen);
DBUG_ASSERT(srclen <= dstlen);
while ((src < srcend) &&
(res= my_ucs2_uni(cs, &wc, (uchar *)src, (uchar*) srcend)) > 0)
{
my_toupper_ucs2(uni_plane, &wc);
if (res != my_uni_ucs2(cs, wc, (uchar*) src, (uchar*) srcend))
if (res != my_uni_ucs2(cs, wc, (uchar*) dst, (uchar*) dstend))
break;
src+= res;
dst+= res;
}
return srclen;
}
@ -2995,23 +3000,24 @@ static void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, size_t slen,
}
static size_t my_casedn_ucs2(CHARSET_INFO *cs, char *src, size_t srclen,
char *dst __attribute__((unused)),
size_t dstlen __attribute__((unused)))
static size_t my_casedn_ucs2(CHARSET_INFO *cs, const char *src, size_t srclen,
char *dst, size_t dstlen)
{
my_wc_t wc;
int res;
char *srcend= src + srclen;
const char *srcend= src + srclen;
char *dstend= dst + dstlen;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
DBUG_ASSERT(src == dst && srclen == dstlen);
DBUG_ASSERT(srclen <= dstlen);
while ((src < srcend) &&
(res= my_ucs2_uni(cs, &wc, (uchar*) src, (uchar*) srcend)) > 0)
{
my_tolower_ucs2(uni_plane, &wc);
if (res != my_uni_ucs2(cs, wc, (uchar*) src, (uchar*) srcend))
if (res != my_uni_ucs2(cs, wc, (uchar*) dst, (uchar*) dstend))
break;
src+= res;
dst+= res;
}
return srclen;
}