MDEV-17474 Change Unicode collation implementation from "handler" to "inline" style

2025-01-15 19:42:28 +01:00 · 2018-10-16 19:10:57 +04:00 · 2018-10-16 19:10:57 +04:00 · 6eae037c4c
commit 6eae037c4c
parent fee24b1281
9 changed files with 1323 additions and 1268 deletions
--- a/include/m_ctype.h
+++ b/include/m_ctype.h
@ -362,7 +362,6 @@ extern MY_COLLATION_HANDLER my_collation_8bit_bin_handler;
 extern MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler;
 extern MY_COLLATION_HANDLER my_collation_8bit_nopad_bin_handler;
 extern MY_COLLATION_HANDLER my_collation_8bit_simple_nopad_ci_handler;
 extern MY_COLLATION_HANDLER my_collation_ucs2_uca_handler;
 /* Some typedef to make it easy for C++ to make function pointers */
 typedef int (*my_charset_conv_mb_wc)(CHARSET_INFO *, my_wc_t *,
--- a/strings/ctype-uca.c
+++ b/strings/ctype-uca.c
--- a/strings/ctype-uca.ic
+++ b/strings/ctype-uca.ic
@ -0,0 +1,763 @@
 /*
  Copyright (c) 2018 MariaDB Corporation
  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; version 2 of the License.
  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
  You should have received a copy of the GNU General Public License
  along with this program; if not, write to the Free Software
  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
 */
 #ifndef MY_FUNCTION_NAME
 #error MY_FUNCTION_NAME is not defined
 #endif
 #ifndef MY_MB_WC
 #error MY_MB_WC is not defined
 #endif
 #ifndef MY_LIKE_RANGE
 #error MY_LIKE_RANGE is not defined
 #endif
 static inline int
 MY_FUNCTION_NAME(scanner_next)(my_uca_scanner *scanner)
 {
  /*
    Check if the weights for the previous character have been
    already fully scanned. If yes, then get the next character and
    initialize wbeg and wlength to its weight string.
  */
  if (scanner->wbeg[0])      /* More weights left from the previous step: */
    return *scanner->wbeg++; /* return the next weight from expansion     */
  do
  {
    const uint16 *wpage;
    my_wc_t wc[MY_UCA_MAX_CONTRACTION];
    int mblen;
    /* Get next character */
    if (((mblen= MY_MB_WC(scanner, wc, scanner->sbeg,
                                       scanner->send)) <= 0))
    {
      if (scanner->sbeg >= scanner->send)
        return -1; /* No more bytes, end of line reached */
      /*
        There are some more bytes left. Non-positive mb_len means that
        we got an incomplete or a bad byte sequence. Consume mbminlen bytes.
      */
      if ((scanner->sbeg+= scanner->cs->mbminlen) > scanner->send)
      {
        /* For safety purposes don't go beyond the string range. */
        scanner->sbeg= scanner->send;
      }
      /*
        Treat every complete or incomplete mbminlen unit as a weight which is
        greater than weight for any possible normal character.
        0xFFFF is greater than any possible weight in the UCA weight table.
      */
      return 0xFFFF;
    }
    scanner->sbeg+= mblen;
    if (wc[0] > scanner->level->maxchar)
    {
      /* Return 0xFFFD as weight for all characters outside BMP */
      scanner->wbeg= nochar;
      return 0xFFFD;
    }
    if (my_uca_have_contractions_quick(scanner->level))
    {
      uint16 *cweight;
      /*
        If we have scanned a character which can have previous context,
        and there were some more characters already before,
        then reconstruct codepoint of the previous character
        from "page" and "code" into w[1], and verify that {wc[1], wc[0]}
        together form a real previous context pair.
        Note, we support only 2-character long sequences with previous
        context at the moment. CLDR does not have longer sequences.
      */
      if (my_uca_can_be_previous_context_tail(&scanner->level->contractions,
                                              wc[0]) &&
          scanner->wbeg != nochar &&     /* if not the very first character */
          my_uca_can_be_previous_context_head(&scanner->level->contractions,
                                              (wc[1]= ((scanner->page << 8) +
                                                        scanner->code))) &&
          (cweight= my_uca_previous_context_find(scanner, wc[1], wc[0])))
      {
        scanner->page= scanner->code= 0; /* Clear for the next character */
        return *cweight;
      }
      else if (my_uca_can_be_contraction_head(&scanner->level->contractions,
                                              wc[0]))
      {
        /* Check if w[0] starts a contraction */
        if ((cweight= my_uca_scanner_contraction_find(scanner, wc)))
          return *cweight;
      }
    }
    /* Process single character */
    scanner->page= wc[0] >> 8;
    scanner->code= wc[0] & 0xFF;
    /* If weight page for w[0] does not exist, then calculate algoritmically */
    if (!(wpage= scanner->level->weights[scanner->page]))
      return my_uca_scanner_next_implicit(scanner);
    /* Calculate pointer to w[0]'s weight, using page and offset */
    scanner->wbeg= wpage +
                   scanner->code * scanner->level->lengths[scanner->page];
  } while (!scanner->wbeg[0]); /* Skip ignorable characters */
  return *scanner->wbeg++;
 }
 /*
  Compares two strings according to the collation
  SYNOPSIS:
    strnncoll_onelevel()
    cs		Character set information
    level       Weight level (0 primary, 1 secondary, 2 tertiary, etc)
    s		First string
    slen	First string length
    t		Second string
    tlen	Seconf string length
    level	DUCETweight level
  NOTES:
    Initializes two weight scanners and gets weights
    corresponding to two strings in a loop. If weights are not
    the same at some step then returns their difference.
    In the while() comparison these situations are possible:
    1. (s_res>0) and (t_res>0) and (s_res == t_res)
       Weights are the same so far, continue comparison
    2. (s_res>0) and (t_res>0) and (s_res!=t_res)
       A difference has been found, return.
    3. (s_res>0) and (t_res<0)
       We have reached the end of the second string, or found
       an illegal multibyte sequence in the second string.
       Return a positive number, i.e. the first string is bigger.
    4. (s_res<0) and (t_res>0)   
       We have reached the end of the first string, or found
       an illegal multibyte sequence in the first string.
       Return a negative number, i.e. the second string is bigger.
    5. (s_res<0) and (t_res<0)
       Both scanners returned -1. It means we have riched
       the end-of-string of illegal-sequence in both strings
       at the same time. Return 0, strings are equal.
  RETURN
    Difference between two strings, according to the collation:
    0               - means strings are equal
    negative number - means the first string is smaller
    positive number - means the first string is bigger
 */
 static int
 MY_FUNCTION_NAME(strnncoll_onelevel)(CHARSET_INFO *cs, 
                                     const MY_UCA_WEIGHT_LEVEL *level,
                                     const uchar *s, size_t slen,
                                     const uchar *t, size_t tlen,
                                     my_bool t_is_prefix)
 {
  my_uca_scanner sscanner;
  my_uca_scanner tscanner;
  int s_res;
  int t_res;
  my_uca_scanner_init_any(&sscanner, cs, level, s, slen);
  my_uca_scanner_init_any(&tscanner, cs, level, t, tlen);
  do
  {
    s_res= MY_FUNCTION_NAME(scanner_next)(&sscanner);
    t_res= MY_FUNCTION_NAME(scanner_next)(&tscanner);
  } while ( s_res == t_res && s_res >0);
  return  (t_is_prefix && t_res < 0) ? 0 : (s_res - t_res);
 }
 /*
  One-level, PAD SPACE.
 */
 static int
 MY_FUNCTION_NAME(strnncoll)(CHARSET_INFO *cs,
                            const uchar *s, size_t slen,
                            const uchar *t, size_t tlen,
                            my_bool t_is_prefix)
 {
  return MY_FUNCTION_NAME(strnncoll_onelevel)(cs, &cs->uca->level[0],
                                              s, slen, t, tlen, t_is_prefix);
 }
 /*
  Multi-level, PAD SPACE.
 */
 static int
 MY_FUNCTION_NAME(strnncoll_multilevel)(CHARSET_INFO *cs,
                                       const uchar *s, size_t slen,
                                       const uchar *t, size_t tlen,
                                       my_bool t_is_prefix)
 {
  uint i, num_level= cs->levels_for_order;
  for (i= 0; i != num_level; i++)
  {
    int ret= MY_FUNCTION_NAME(strnncoll_onelevel)(cs, &cs->uca->level[i],
                                                  s, slen, t, tlen,
                                                  t_is_prefix);
    if (ret)
       return ret;
  }
  return 0;
 }
 /*
  Compares two strings according to the collation,
  ignoring trailing spaces.
  SYNOPSIS:
    strnncollsp_onelevel()
    cs		Character set information
    level       UCA weight level
    s		First string
    slen	First string length
    t		Second string
    tlen	Seconf string length
    level	DUCETweight level
  NOTES:
    Works exactly the same with my_strnncoll_uca(),
    but ignores trailing spaces.
    In the while() comparison these situations are possible:
    1. (s_res>0) and (t_res>0) and (s_res == t_res)
       Weights are the same so far, continue comparison
    2. (s_res>0) and (t_res>0) and (s_res!=t_res)
       A difference has been found, return.
    3. (s_res>0) and (t_res<0)
       We have reached the end of the second string, or found
       an illegal multibyte sequence in the second string.
       Compare the first string to an infinite array of
       space characters until difference is found, or until
       the end of the first string.
    4. (s_res<0) and (t_res>0)
       We have reached the end of the first string, or found
       an illegal multibyte sequence in the first string.
       Compare the second string to an infinite array of
       space characters until difference is found or until
       the end of the second steing.
    5. (s_res<0) and (t_res<0)
       Both scanners returned -1. It means we have riched
       the end-of-string of illegal-sequence in both strings
       at the same time. Return 0, strings are equal.
  RETURN
    Difference between two strings, according to the collation:
    0               - means strings are equal
    negative number - means the first string is smaller
    positive number - means the first string is bigger
 */
 static int
 MY_FUNCTION_NAME(strnncollsp_onelevel)(CHARSET_INFO *cs,
                                       const MY_UCA_WEIGHT_LEVEL *level,
                                       const uchar *s, size_t slen,
                                       const uchar *t, size_t tlen)
 {
  my_uca_scanner sscanner, tscanner;
  int s_res, t_res;
  my_uca_scanner_init_any(&sscanner, cs, level, s, slen);
  my_uca_scanner_init_any(&tscanner, cs, level, t, tlen);
  do
  {
    s_res= MY_FUNCTION_NAME(scanner_next)(&sscanner);
    t_res= MY_FUNCTION_NAME(scanner_next)(&tscanner);
  } while ( s_res == t_res && s_res >0);
  if (s_res > 0 && t_res < 0)
  {
    /* Calculate weight for SPACE character */
    t_res= my_space_weight(level);
    /* compare the first string to spaces */
    do
    {
      if (s_res != t_res)
        return (s_res - t_res);
      s_res= MY_FUNCTION_NAME(scanner_next)(&sscanner);
    } while (s_res > 0);
    return 0;
  }
  if (s_res < 0 && t_res > 0)
  {
    /* Calculate weight for SPACE character */
    s_res= my_space_weight(level);
    /* compare the second string to spaces */
    do
    {
      if (s_res != t_res)
        return (s_res - t_res);
      t_res= MY_FUNCTION_NAME(scanner_next)(&tscanner);
    } while (t_res > 0);
    return 0;
  }
  return ( s_res - t_res );
 }
 /*
  One-level, PAD SPACE
 */
 static int
 MY_FUNCTION_NAME(strnncollsp)(CHARSET_INFO *cs,
                              const uchar *s, size_t slen,
                              const uchar *t, size_t tlen)
 {
  return MY_FUNCTION_NAME(strnncollsp_onelevel)(cs, &cs->uca->level[0],
                                                s, slen, t, tlen);
 }
 /*
  One-level, NO PAD
 */
 static int
 MY_FUNCTION_NAME(strnncollsp_nopad)(CHARSET_INFO *cs,
                                    const uchar *s, size_t slen,
                                    const uchar *t, size_t tlen)
 {
  return MY_FUNCTION_NAME(strnncoll_onelevel)(cs, &cs->uca->level[0],
                                              s, slen, t, tlen, FALSE);
 }
 /*
  Multi-level, PAD SPACE
 */
 static int
 MY_FUNCTION_NAME(strnncollsp_multilevel)(CHARSET_INFO *cs,
                                         const uchar *s, size_t slen,
                                         const uchar *t, size_t tlen)
 {
  uint i, num_level= cs->levels_for_order;
  for (i= 0; i != num_level; i++)
  {
    int ret= MY_FUNCTION_NAME(strnncollsp_onelevel)(cs, &cs->uca->level[i],
                                                    s, slen, t, tlen);
    if (ret)
      return ret;
  }
  return 0;
 }
 /*
  Multi-level, NO PAD
 */
 static int
 MY_FUNCTION_NAME(strnncollsp_nopad_multilevel)(CHARSET_INFO *cs,
                                               const uchar *s, size_t slen,
                                               const uchar *t, size_t tlen)
 {
  uint num_level= cs->levels_for_order;
  uint i;
  for (i= 0; i != num_level; i++)
  {
    int ret= MY_FUNCTION_NAME(strnncoll_onelevel)(cs, &cs->uca->level[i],
                                                  s, slen, t, tlen, FALSE);
    if (ret)
       return ret;
  }
  return 0;
 }
 /*
  Calculates hash value for the given string,
  according to the collation, and ignoring trailing spaces.
  SYNOPSIS:
    hash_sort()
    cs		Character set information
    s		String
    slen	String's length
    n1		First hash parameter
    n2		Second hash parameter
  NOTES:
    Scans consequently weights and updates
    hash parameters n1 and n2. In a case insensitive collation,
    upper and lower case of the same letter will return the same
    weight sequence, and thus will produce the same hash values
    in n1 and n2.
    This functions is used for one-level and for multi-level collations.
    We intentionally use only primary level in multi-level collations.
    This helps to have PARTITION BY KEY put primarily equal records
    into the same partition. E.g. in utf8_thai_520_ci records that differ
    only in tone marks go into the same partition.
  RETURN
    N/A
 */
 static void
 MY_FUNCTION_NAME(hash_sort)(CHARSET_INFO *cs,
                            const uchar *s, size_t slen,
                            ulong *nr1, ulong *nr2)
 {
  int   s_res;
  my_uca_scanner scanner;
  int space_weight= my_space_weight(&cs->uca->level[0]);
  register ulong m1= *nr1, m2= *nr2;
  my_uca_scanner_init_any(&scanner, cs, &cs->uca->level[0], s, slen);
  while ((s_res= MY_FUNCTION_NAME(scanner_next)(&scanner)) >0)
  {
    if (s_res == space_weight)
    {
      /* Combine all spaces to be able to skip end spaces */
      uint count= 0;
      do
      {
        count++;
        if ((s_res= MY_FUNCTION_NAME(scanner_next)(&scanner)) <= 0)
        {
          /* Skip strings at end of string */
          goto end;
        }
      }
      while (s_res == space_weight);
      /* Add back that has for the space characters */
      do
      {
        /*
          We can't use MY_HASH_ADD_16() here as we, because of a misstake
          in the original code, where we added the 16 byte variable the
          opposite way.  Changing this would cause old partitioned tables
          to fail.
        */
        MY_HASH_ADD(m1, m2, space_weight >> 8);
        MY_HASH_ADD(m1, m2, space_weight & 0xFF);
      }
      while (--count != 0);
    }
    /* See comment above why we can't use MY_HASH_ADD_16() */
    MY_HASH_ADD(m1, m2, s_res >> 8);
    MY_HASH_ADD(m1, m2, s_res & 0xFF);
  }
 end:
  *nr1= m1;
  *nr2= m2;
 }
 static void
 MY_FUNCTION_NAME(hash_sort_nopad)(CHARSET_INFO *cs,
                                  const uchar *s, size_t slen,
                                  ulong *nr1, ulong *nr2)
 {
  int   s_res;
  my_uca_scanner scanner;
  register ulong m1= *nr1, m2= *nr2;
  my_uca_scanner_init_any(&scanner, cs, &cs->uca->level[0], s, slen);
  while ((s_res= MY_FUNCTION_NAME(scanner_next)(&scanner)) >0)
  {
    /* See comment above why we can't use MY_HASH_ADD_16() */
    MY_HASH_ADD(m1, m2, s_res >> 8);
    MY_HASH_ADD(m1, m2, s_res & 0xFF);
  }
  *nr1= m1;
  *nr2= m2;
 }
 /*
  For the given string creates its "binary image", suitable
  to be used in binary comparison, i.e. in memcmp(). 
  SYNOPSIS:
    my_strnxfrm_uca()
    cs		Character set information
    dst		Where to write the image
    dstlen	Space available for the image, in bytes
    src		The source string
    srclen	Length of the source string, in bytes
  NOTES:
    In a loop, scans weights from the source string and writes
    them into the binary image. In a case insensitive collation,
    upper and lower cases of the same letter will produce the
    same image subsequences. When we have reached the end-of-string
    or found an illegal multibyte sequence, the loop stops.
    It is impossible to restore the original string using its
    binary image. 
    Binary images are used for bulk comparison purposes,
    e.g. in ORDER BY, when it is more efficient to create
    a binary image and use it instead of weight scanner
    for the original strings for every comparison.
  RETURN
    Number of bytes that have been written into the binary image.
 */
 static uchar *
 MY_FUNCTION_NAME(strnxfrm_onelevel_internal)(CHARSET_INFO *cs,
                                             MY_UCA_WEIGHT_LEVEL *level,
                                             uchar *dst, uchar *de,
                                             uint *nweights,
                                             const uchar *src, size_t srclen)
 {
  my_uca_scanner scanner;
  int s_res;
  DBUG_ASSERT(src || !srclen);
  my_uca_scanner_init_any(&scanner, cs, level, src, srclen);
  for (; dst < de && *nweights &&
         (s_res= MY_FUNCTION_NAME(scanner_next)(&scanner)) > 0 ; (*nweights)--)
  {
    *dst++= s_res >> 8;
    if (dst < de)
      *dst++= s_res & 0xFF;
  }
  return dst;
 }
 static uchar *
 MY_FUNCTION_NAME(strnxfrm_onelevel)(CHARSET_INFO *cs,
                                    MY_UCA_WEIGHT_LEVEL *level,
                                    uchar *dst, uchar *de, uint nweights,
                                    const uchar *src, size_t srclen, uint flags)
 {
  uchar *d0= dst;
  dst= MY_FUNCTION_NAME(strnxfrm_onelevel_internal)(cs, level,
                                                    dst, de, &nweights,
                                                    src, srclen);
  DBUG_ASSERT(dst <= de);
  if (dst < de && nweights && (flags & MY_STRXFRM_PAD_WITH_SPACE))
    dst= my_strnxfrm_uca_padn(dst, de, nweights, my_space_weight(level));
  DBUG_ASSERT(dst <= de);
  my_strxfrm_desc_and_reverse(d0, dst, flags, 0);
  return dst;
 }
 static uchar *
 MY_FUNCTION_NAME(strnxfrm_nopad_onelevel)(CHARSET_INFO *cs,
                                          MY_UCA_WEIGHT_LEVEL *level,
                                          uchar *dst, uchar *de, uint nweights,
                                          const uchar *src, size_t srclen,
                                          uint flags)
 {
  uchar *d0= dst;
  dst= MY_FUNCTION_NAME(strnxfrm_onelevel_internal)(cs, level,
                                                    dst, de, &nweights,
                                                    src, srclen);
  DBUG_ASSERT(dst <= de);
  /*  Pad with the minimum possible weight on this level */
  if (dst < de && nweights && (flags & MY_STRXFRM_PAD_WITH_SPACE))
    dst= my_strnxfrm_uca_padn(dst, de, nweights, min_weight_on_level(level));
  DBUG_ASSERT(dst <= de);
  my_strxfrm_desc_and_reverse(d0, dst, flags, 0);
  return dst;
 }
 static size_t
 MY_FUNCTION_NAME(strnxfrm)(CHARSET_INFO *cs,
                           uchar *dst, size_t dstlen, uint nweights,
                           const uchar *src, size_t srclen, uint flags)
 {
  uchar *d0= dst;
  uchar *de= dst + dstlen;
  dst= MY_FUNCTION_NAME(strnxfrm_onelevel)(cs, &cs->uca->level[0],
                                           dst, de, nweights,
                                           src, srclen, flags);
  /*
    This can probably be changed to memset(dst, 0, de - dst),
    like my_strnxfrm_uca_multilevel() does.
  */
  if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && dst < de)
    dst= my_strnxfrm_uca_pad(dst, de, my_space_weight(&cs->uca->level[0]));
  return dst - d0;
 }
 static size_t
 MY_FUNCTION_NAME(strnxfrm_nopad)(CHARSET_INFO *cs,
                                 uchar *dst, size_t dstlen,
                                 uint nweights,
                                 const uchar *src, size_t srclen,
                                 uint flags)
 {
  uchar *d0= dst;
  uchar *de= dst + dstlen;
  dst= MY_FUNCTION_NAME(strnxfrm_nopad_onelevel)(cs, &cs->uca->level[0],
                                                 dst, de, nweights,
                                                 src, srclen, flags);
  if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && dst < de)
  {
    memset(dst, 0, de - dst);
    dst= de;
  }
  return dst - d0;
 }
 static size_t
 MY_FUNCTION_NAME(strnxfrm_multilevel)(CHARSET_INFO *cs, 
                                      uchar *dst, size_t dstlen,
                                      uint nweights,
                                      const uchar *src, size_t srclen,
                                      uint flags)
 {
  uint num_level= cs->levels_for_order;
  uchar *d0= dst;
  uchar *de= dst + dstlen;
  uint current_level;
  for (current_level= 0; current_level != num_level; current_level++)
  {
    if (!(flags & MY_STRXFRM_LEVEL_ALL) ||
        (flags & (MY_STRXFRM_LEVEL1 << current_level)))
      dst= cs->state & MY_CS_NOPAD ?
           MY_FUNCTION_NAME(strnxfrm_nopad_onelevel)(cs,
                                          &cs->uca->level[current_level],
                                          dst, de, nweights,
                                          src, srclen, flags) :
           MY_FUNCTION_NAME(strnxfrm_onelevel)(cs,
                                    &cs->uca->level[current_level],
                                    dst, de, nweights,
                                    src, srclen, flags);
  }
  if (dst < de && (flags & MY_STRXFRM_PAD_TO_MAXLEN))
  {
    memset(dst, 0, de - dst);
    dst= de;
  }
  return dst - d0;
 }
 /*
  One-level, PAD SPACE
 */
 MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler)=
 {
  my_coll_init_uca,
  MY_FUNCTION_NAME(strnncoll),
  MY_FUNCTION_NAME(strnncollsp),
  MY_FUNCTION_NAME(strnxfrm),
  my_strnxfrmlen_any_uca,
  MY_LIKE_RANGE,
  my_wildcmp_uca,
  NULL,                                /* strcasecmp() */
  my_instr_mb,
  MY_FUNCTION_NAME(hash_sort),
  my_propagate_complex
 };
 /*
  One-level, NO PAD
  For character sets with mbminlen==1 use MY_LIKE_RANGE=my_like_range_mb
  For character sets with mbminlen>=2 use MY_LIKE_RANGE=my_like_range_generic
 */
 MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler_nopad)=
 {
  my_coll_init_uca,
  MY_FUNCTION_NAME(strnncoll),
  MY_FUNCTION_NAME(strnncollsp_nopad),
  MY_FUNCTION_NAME(strnxfrm_nopad),
  my_strnxfrmlen_any_uca,
  MY_LIKE_RANGE,    /* my_like_range_mb or my_like_range_generic */
  my_wildcmp_uca,
  NULL,                                /* strcasecmp() */
  my_instr_mb,
  MY_FUNCTION_NAME(hash_sort_nopad),
  my_propagate_complex
 };
 /*
  Multi-level, PAD SPACE
 */
 MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler_multilevel)=
 {
  my_coll_init_uca,
  MY_FUNCTION_NAME(strnncoll_multilevel),
  MY_FUNCTION_NAME(strnncollsp_multilevel),
  MY_FUNCTION_NAME(strnxfrm_multilevel),
  my_strnxfrmlen_any_uca_multilevel,
  MY_LIKE_RANGE,
  my_wildcmp_uca,
  NULL,                                /* strcasecmp() */
  my_instr_mb,
  MY_FUNCTION_NAME(hash_sort),
  my_propagate_complex
 };
 /*
  Multi-level, NO PAD
 */
 MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler_nopad_multilevel)=
 {
  my_coll_init_uca,
  MY_FUNCTION_NAME(strnncoll_multilevel),
  MY_FUNCTION_NAME(strnncollsp_nopad_multilevel),
  MY_FUNCTION_NAME(strnxfrm_multilevel),
  my_strnxfrmlen_any_uca_multilevel,
  MY_LIKE_RANGE,
  my_wildcmp_uca,
  NULL,                                /* strcasecmp() */
  my_instr_mb,
  MY_FUNCTION_NAME(hash_sort),
  my_propagate_complex
 };
 #undef MY_FUNCTION_NAME
 #undef MY_MB_WC
 #undef MY_LIKE_RANGE
--- a/strings/ctype-ucs2.c
+++ b/strings/ctype-ucs2.c
@ -1184,35 +1184,7 @@ my_lengthsp_mb2(CHARSET_INFO *cs __attribute__((unused)),
  but the JSON functions needed my_utf16_uni()
  so the #ifdef was moved lower.
 */
-
+#include "ctype-utf16.h"
 /*
  D800..DB7F - Non-provate surrogate high (896 pages)
  DB80..DBFF - Private surrogate high     (128 pages)
  DC00..DFFF - Surrogate low              (1024 codes in a page)
 */
 #define MY_UTF16_SURROGATE_HIGH_FIRST 0xD800
 #define MY_UTF16_SURROGATE_HIGH_LAST  0xDBFF
 #define MY_UTF16_SURROGATE_LOW_FIRST  0xDC00
 #define MY_UTF16_SURROGATE_LOW_LAST   0xDFFF
 #define MY_UTF16_HIGH_HEAD(x)      ((((uchar) (x)) & 0xFC) == 0xD8)
 #define MY_UTF16_LOW_HEAD(x)       ((((uchar) (x)) & 0xFC) == 0xDC)
 /* Test if a byte is a leading byte of a high or low surrogate head: */
 #define MY_UTF16_SURROGATE_HEAD(x) ((((uchar) (x)) & 0xF8) == 0xD8)
 /* Test if a Unicode code point is a high or low surrogate head */
 #define MY_UTF16_SURROGATE(x)      (((x) & 0xF800) == 0xD800)
 #define MY_UTF16_WC2(a, b)         ((a << 8) + b)
 /*
  a= 110110??  (<< 18)
  b= ????????  (<< 10)
  c= 110111??  (<<  8)
  d= ????????  (<<  0)
 */
 #define MY_UTF16_WC4(a, b, c, d) (((a & 3) << 18) + (b << 10) + \
                                  ((c & 3) << 8) + d + 0x10000)
 #define IS_MB2_CHAR(b0,b1)       (!MY_UTF16_SURROGATE_HEAD(b0))
 #define IS_MB4_CHAR(b0,b1,b2,b3) (MY_UTF16_HIGH_HEAD(b0) && MY_UTF16_LOW_HEAD(b2))
@ -1261,32 +1233,7 @@ static inline int my_weight_mb2_utf16mb2_general_ci(uchar b0, uchar b1)
 my_utf16_uni(CHARSET_INFO *cs __attribute__((unused)),
             my_wc_t *pwc, const uchar *s, const uchar *e)
 {
-  if (s + 2 > e)
+  return my_mb_wc_utf16_quick(pwc, s, e);
    return MY_CS_TOOSMALL2;
  /*
    High bytes: 0xD[89AB] = B'110110??'
    Low bytes:  0xD[CDEF] = B'110111??'
    Surrogate mask:  0xFC = B'11111100'
  */
  if (MY_UTF16_HIGH_HEAD(*s)) /* Surrogate head */
  {
    if (s + 4 > e)
      return MY_CS_TOOSMALL4;
    if (!MY_UTF16_LOW_HEAD(s[2]))  /* Broken surrigate pair */
      return MY_CS_ILSEQ;
    *pwc= MY_UTF16_WC4(s[0], s[1], s[2], s[3]);
    return 4;
  }
  if (MY_UTF16_LOW_HEAD(*s)) /* Low surrogate part without high part */
    return MY_CS_ILSEQ;
  *pwc= MY_UTF16_WC2(s[0], s[1]);
  return 2;
 }
@ -2109,6 +2056,8 @@ struct charset_info_st my_charset_utf16le_nopad_bin=
 #ifdef HAVE_CHARSET_utf32
 #include "ctype-utf32.h"
 /*
  Check is b0 and b1 start a valid UTF32 four-byte sequence.
  Don't accept characters greater than U+10FFFF.
@ -2117,8 +2066,6 @@ struct charset_info_st my_charset_utf16le_nopad_bin=
 #define IS_MB4_CHAR(b0,b1,b2,b3)   (IS_UTF32_MBHEAD4(b0,b1))
 #define MY_UTF32_WC4(b0,b1,b2,b3)  ((((my_wc_t)b0) << 24) + (b1 << 16) + \
                                                (b2 << 8) + (b3))
 static inline int my_weight_utf32_general_ci(uchar b0, uchar b1,
                                             uchar b2, uchar b3)
@ -2161,10 +2108,7 @@ static int
 my_utf32_uni(CHARSET_INFO *cs __attribute__((unused)),
             my_wc_t *pwc, const uchar *s, const uchar *e)
 {
-  if (s + 4 > e)
+  return my_mb_wc_utf32_quick(pwc, s, e);
    return MY_CS_TOOSMALL4;
  *pwc= MY_UTF32_WC4(s[0], s[1], s[2], s[3]);
  return *pwc > 0x10FFFF ? MY_CS_ILSEQ : 4;
 }
@ -2928,6 +2872,8 @@ struct charset_info_st my_charset_utf32_nopad_bin=
 #ifdef HAVE_CHARSET_ucs2
 #include "ctype-ucs2.h"
 static const uchar ctype_ucs2[] = {
    0,
   32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32,
@ -3037,11 +2983,7 @@ my_charlen_ucs2(CHARSET_INFO *cs __attribute__((unused)),
 static int my_ucs2_uni(CHARSET_INFO *cs __attribute__((unused)),
 		       my_wc_t * pwc, const uchar *s, const uchar *e)
 {
-  if (s+2 > e) /* Need 2 characters */
+  return my_mb_wc_ucs2_quick(pwc, s, e);
    return MY_CS_TOOSMALL2;
  *pwc= ((uchar)s[0]) * 256  + ((uchar)s[1]);
  return 2;
 }
 static int my_uni_ucs2(CHARSET_INFO *cs __attribute__((unused)) ,
--- a/strings/ctype-ucs2.h
+++ b/strings/ctype-ucs2.h
@ -0,0 +1,32 @@
 /*
  Copyright (c) 2018 MariaDB Corporation
  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; version 2 of the License.
  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
  You should have received a copy of the GNU General Public License
  along with this program; if not, write to the Free Software
  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
 */
 #ifndef _CTYPE_UCS2_H
 #define _CTYPE_UCS2_H
 static inline int
 my_mb_wc_ucs2_quick(my_wc_t * pwc, const uchar *s, const uchar *e)
 {
  if (s+2 > e) /* Need 2 characters */
    return MY_CS_TOOSMALL2;
  *pwc= ((uchar)s[0]) * 256  + ((uchar)s[1]);
  return 2;
 }
 #endif /* _CTYPE_UCS2_H */
--- a/strings/ctype-utf16.h
+++ b/strings/ctype-utf16.h
@ -0,0 +1,80 @@
 /*
  Copyright (c) 2018 MariaDB Corporation
  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; version 2 of the License.
  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
  You should have received a copy of the GNU General Public License
  along with this program; if not, write to the Free Software
  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
 */
 #ifndef _CTYPE_UTF16_H
 #define _CTYPE_UTF16_H
 /*
  D800..DB7F - Non-provate surrogate high (896 pages)
  DB80..DBFF - Private surrogate high     (128 pages)
  DC00..DFFF - Surrogate low              (1024 codes in a page)
 */
 #define MY_UTF16_SURROGATE_HIGH_FIRST 0xD800
 #define MY_UTF16_SURROGATE_HIGH_LAST  0xDBFF
 #define MY_UTF16_SURROGATE_LOW_FIRST  0xDC00
 #define MY_UTF16_SURROGATE_LOW_LAST   0xDFFF
 #define MY_UTF16_HIGH_HEAD(x)      ((((uchar) (x)) & 0xFC) == 0xD8)
 #define MY_UTF16_LOW_HEAD(x)       ((((uchar) (x)) & 0xFC) == 0xDC)
 /* Test if a byte is a leading byte of a high or low surrogate head: */
 #define MY_UTF16_SURROGATE_HEAD(x) ((((uchar) (x)) & 0xF8) == 0xD8)
 /* Test if a Unicode code point is a high or low surrogate head */
 #define MY_UTF16_SURROGATE(x)      (((x) & 0xF800) == 0xD800)
 #define MY_UTF16_WC2(a, b)         ((a << 8) + b)
 /*
  a= 110110??  (<< 18)
  b= ????????  (<< 10)
  c= 110111??  (<<  8)
  d= ????????  (<<  0)
 */
 #define MY_UTF16_WC4(a, b, c, d) (((a & 3) << 18) + (b << 10) + \
                                  ((c & 3) << 8) + d + 0x10000)
 static inline int
 my_mb_wc_utf16_quick(my_wc_t *pwc, const uchar *s, const uchar *e)
 {
  if (s + 2 > e)
    return MY_CS_TOOSMALL2;
  /*
    High bytes: 0xD[89AB] = B'110110??'
    Low bytes:  0xD[CDEF] = B'110111??'
    Surrogate mask:  0xFC = B'11111100'
  */
  if (MY_UTF16_HIGH_HEAD(*s)) /* Surrogate head */
  {
    if (s + 4 > e)
      return MY_CS_TOOSMALL4;
    if (!MY_UTF16_LOW_HEAD(s[2]))  /* Broken surrigate pair */
      return MY_CS_ILSEQ;
    *pwc= MY_UTF16_WC4(s[0], s[1], s[2], s[3]);
    return 4;
  }
  if (MY_UTF16_LOW_HEAD(*s)) /* Low surrogate part without high part */
    return MY_CS_ILSEQ;
  *pwc= MY_UTF16_WC2(s[0], s[1]);
  return 2;
 }
 #endif /* _CTYPE_UTF16_H */
--- a/strings/ctype-utf32.h
+++ b/strings/ctype-utf32.h
@ -0,0 +1,33 @@
 /*
  Copyright (c) 2018 MariaDB Corporation
  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; version 2 of the License.
  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
  You should have received a copy of the GNU General Public License
  along with this program; if not, write to the Free Software
  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
 */
 #ifndef _CTYPE_UTF32_H
 #define _CTYPE_UTF32_H
 #define MY_UTF32_WC4(b0,b1,b2,b3)  ((((my_wc_t)b0) << 24) + (b1 << 16) + \
                                                (b2 << 8) + (b3))
 static inline int
 my_mb_wc_utf32_quick(my_wc_t *pwc, const uchar *s, const uchar *e)
 {
  if (s + 4 > e)
    return MY_CS_TOOSMALL4;
  *pwc= MY_UTF32_WC4(s[0], s[1], s[2], s[3]);
  return *pwc > 0x10FFFF ? MY_CS_ILSEQ : 4;
 }
 #endif /* _CTYPE_UTF32_H */
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@ -26,78 +26,9 @@
 #define EILSEQ ENOENT
 #endif
 /* Detect special bytes and sequences */
 #define IS_CONTINUATION_BYTE(c)   (((uchar) (c) ^ 0x80) < 0x40)
-/*
+#include "ctype-utf8.h"
  Check MB2 character assuming that b0 is alredy known to be >= 0xC2.
  Use this macro if the caller already checked b0 for:
  - an MB1 character
  - an unused gap between MB1 and MB2HEAD
 */
 #define IS_UTF8MB2_STEP2(b0,b1)     (((uchar) (b0) < 0xE0) && \
                                     IS_CONTINUATION_BYTE((uchar) b1))
 /*
  Check MB3 character assuming that b0 is already known to be
  in the valid MB3HEAD range [0xE0..0xEF].
 */
 #define IS_UTF8MB3_STEP2(b0,b1,b2) (IS_CONTINUATION_BYTE(b1) && \
                                    IS_CONTINUATION_BYTE(b2) && \
                                    ((uchar) b0 >= 0xe1 || (uchar) b1 >= 0xa0))
 /*
  Check MB3 character assuming that b0 is already known to be >= 0xE0,
  but is not checked for the high end 0xF0 yet.
  Use this macro if the caller already checked b0 for:
  - an MB1 character
  - an unused gap between MB1 and MB2HEAD
  - an MB2HEAD
 */
 #define IS_UTF8MB3_STEP3(b0,b1,b2) (((uchar) (b0) < 0xF0) && \
                                    IS_UTF8MB3_STEP2(b0,b1,b2))
 /*
  UTF-8 quick four-byte mask:
  11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
  Encoding allows to encode U+00010000..U+001FFFFF
  The maximum character defined in the Unicode standard is U+0010FFFF.
  Higher characters U+00110000..U+001FFFFF are not used.
  11110000.10010000.10xxxxxx.10xxxxxx == F0.90.80.80 == U+00010000 (min)
  11110100.10001111.10111111.10111111 == F4.8F.BF.BF == U+0010FFFF (max)
  Valid codes:
  [F0][90..BF][80..BF][80..BF]
  [F1][80..BF][80..BF][80..BF]
  [F2][80..BF][80..BF][80..BF]
  [F3][80..BF][80..BF][80..BF]
  [F4][80..8F][80..BF][80..BF]
 */
 /*
  Check MB4 character assuming that b0 is already
  known to be in the range [0xF0..0xF4]
 */
 #define IS_UTF8MB4_STEP2(b0,b1,b2,b3) (IS_CONTINUATION_BYTE(b1) && \
                                       IS_CONTINUATION_BYTE(b2) && \
                                       IS_CONTINUATION_BYTE(b3) && \
                                       (b0 >= 0xf1 || b1 >= 0x90) && \
                                       (b0 <= 0xf3 || b1 <= 0x8F))
 #define IS_UTF8MB4_STEP3(b0,b1,b2,b3) (((uchar) (b0) < 0xF5) && \
                                       IS_UTF8MB4_STEP2(b0,b1,b2,b3))
 /* Convert individual bytes to Unicode code points */
 #define UTF8MB2_CODE(b0,b1)       (((my_wc_t) ((uchar) b0 & 0x1f) << 6)  |\
                                   ((my_wc_t) ((uchar) b1 ^ 0x80)))
 #define UTF8MB3_CODE(b0,b1,b2)    (((my_wc_t) ((uchar) b0 & 0x0f) << 12) |\
                                   ((my_wc_t) ((uchar) b1 ^ 0x80) << 6)  |\
                                   ((my_wc_t) ((uchar) b2 ^ 0x80)))
 #define UTF8MB4_CODE(b0,b1,b2,b3) (((my_wc_t) ((uchar) b0 & 0x07) << 18) |\
                                   ((my_wc_t) ((uchar) b1 ^ 0x80) << 12) |\
                                   ((my_wc_t) ((uchar) b2 ^ 0x80) << 6)  |\
                                    (my_wc_t) ((uchar) b3 ^ 0x80))
 /* Definitions for strcoll.ic */
 #define IS_MB1_CHAR(x)              ((uchar) (x) < 0x80)
@ -4981,42 +4912,7 @@ static const uchar to_upper_utf8[] = {
 static int my_utf8_uni(CHARSET_INFO *cs __attribute__((unused)),
                       my_wc_t * pwc, const uchar *s, const uchar *e)
 {
-  uchar c;
+  return my_mb_wc_utf8mb3_quick(pwc, s, e);
  if (s >= e)
    return MY_CS_TOOSMALL;
  c= s[0];
  if (c < 0x80)
  {
    *pwc = c;
    return 1;
  }
  else if (c < 0xc2)
    return MY_CS_ILSEQ;
  else if (c < 0xe0)
  {
    if (s+2 > e) /* We need 2 characters */
      return MY_CS_TOOSMALL2;
    if (!(IS_CONTINUATION_BYTE(s[1])))
      return MY_CS_ILSEQ;
    *pwc= UTF8MB2_CODE(c, s[1]);
    return 2;
  }
  else if (c < 0xf0)
  {
    if (s+3 > e) /* We need 3 characters */
      return MY_CS_TOOSMALL3;
    if (!IS_UTF8MB3_STEP2(c, s[1], s[2]))
      return MY_CS_ILSEQ;
    *pwc= UTF8MB3_CODE(c, s[1], s[2]);
    return 3;
  }
  return MY_CS_ILSEQ;
 }
@ -7379,52 +7275,7 @@ static int
 my_mb_wc_utf8mb4(CHARSET_INFO *cs __attribute__((unused)),
                 my_wc_t * pwc, const uchar *s, const uchar *e)
 {
-  uchar c;
+  return my_mb_wc_utf8mb4_quick(pwc, s, e);
  if (s >= e)
    return MY_CS_TOOSMALL;
  c= s[0];
  if (c < 0x80)
  {
    *pwc= c;
    return 1;
  }
  else if (c < 0xc2)
    return MY_CS_ILSEQ;
  else if (c < 0xe0)
  {
    if (s + 2 > e) /* We need 2 characters */
      return MY_CS_TOOSMALL2;
    if (!(IS_CONTINUATION_BYTE(s[1])))
      return MY_CS_ILSEQ;
    *pwc= UTF8MB2_CODE(c, s[1]);
    return 2;
  }
  else if (c < 0xf0)
  {
    if (s + 3 > e) /* We need 3 characters */
      return MY_CS_TOOSMALL3;
    if (!IS_UTF8MB3_STEP2(c, s[1], s[2]))
      return MY_CS_ILSEQ;
    *pwc= UTF8MB3_CODE(c, s[1], s[2]);
    return 3;
  }
  else if (c < 0xf5)
  {
    if (s + 4 > e) /* We need 4 characters */
      return MY_CS_TOOSMALL4;
    if (!IS_UTF8MB4_STEP2(c, s[1], s[2], s[3]))
      return MY_CS_ILSEQ;
    *pwc= UTF8MB4_CODE(c, s[1], s[2], s[3]);
    return 4;
  }
  return MY_CS_ILSEQ;
 }
--- a/strings/ctype-utf8.h
+++ b/strings/ctype-utf8.h
@ -0,0 +1,190 @@
 /*
  Copyright (c) 2018 MariaDB Corporation
  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; version 2 of the License.
  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
  You should have received a copy of the GNU General Public License
  along with this program; if not, write to the Free Software
  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
 */
 #ifndef _CTYPE_UTF8_H
 #define _CTYPE_UTF8_H
 /* Detect special bytes and sequences */
 #define IS_CONTINUATION_BYTE(c)   (((uchar) (c) ^ 0x80) < 0x40)
 /*
  Check MB2 character assuming that b0 is alredy known to be >= 0xC2.
  Use this macro if the caller already checked b0 for:
  - an MB1 character
  - an unused gap between MB1 and MB2HEAD
 */
 #define IS_UTF8MB2_STEP2(b0,b1)     (((uchar) (b0) < 0xE0) && \
                                     IS_CONTINUATION_BYTE((uchar) b1))
 /*
  Check MB3 character assuming that b0 is already known to be
  in the valid MB3HEAD range [0xE0..0xEF].
 */
 #define IS_UTF8MB3_STEP2(b0,b1,b2) (IS_CONTINUATION_BYTE(b1) && \
                                    IS_CONTINUATION_BYTE(b2) && \
                                    ((uchar) b0 >= 0xe1 || (uchar) b1 >= 0xa0))
 /*
  Check MB3 character assuming that b0 is already known to be >= 0xE0,
  but is not checked for the high end 0xF0 yet.
  Use this macro if the caller already checked b0 for:
  - an MB1 character
  - an unused gap between MB1 and MB2HEAD
  - an MB2HEAD
 */
 #define IS_UTF8MB3_STEP3(b0,b1,b2) (((uchar) (b0) < 0xF0) && \
                                    IS_UTF8MB3_STEP2(b0,b1,b2))
 /*
  UTF-8 quick four-byte mask:
  11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
  Encoding allows to encode U+00010000..U+001FFFFF
  The maximum character defined in the Unicode standard is U+0010FFFF.
  Higher characters U+00110000..U+001FFFFF are not used.
  11110000.10010000.10xxxxxx.10xxxxxx == F0.90.80.80 == U+00010000 (min)
  11110100.10001111.10111111.10111111 == F4.8F.BF.BF == U+0010FFFF (max)
  Valid codes:
  [F0][90..BF][80..BF][80..BF]
  [F1][80..BF][80..BF][80..BF]
  [F2][80..BF][80..BF][80..BF]
  [F3][80..BF][80..BF][80..BF]
  [F4][80..8F][80..BF][80..BF]
 */
 /*
  Check MB4 character assuming that b0 is already
  known to be in the range [0xF0..0xF4]
 */
 #define IS_UTF8MB4_STEP2(b0,b1,b2,b3) (IS_CONTINUATION_BYTE(b1) && \
                                       IS_CONTINUATION_BYTE(b2) && \
                                       IS_CONTINUATION_BYTE(b3) && \
                                       (b0 >= 0xf1 || b1 >= 0x90) && \
                                       (b0 <= 0xf3 || b1 <= 0x8F))
 #define IS_UTF8MB4_STEP3(b0,b1,b2,b3) (((uchar) (b0) < 0xF5) && \
                                       IS_UTF8MB4_STEP2(b0,b1,b2,b3))
 /* Convert individual bytes to Unicode code points */
 #define UTF8MB2_CODE(b0,b1)       (((my_wc_t) ((uchar) b0 & 0x1f) << 6)  |\
                                   ((my_wc_t) ((uchar) b1 ^ 0x80)))
 #define UTF8MB3_CODE(b0,b1,b2)    (((my_wc_t) ((uchar) b0 & 0x0f) << 12) |\
                                   ((my_wc_t) ((uchar) b1 ^ 0x80) << 6)  |\
                                   ((my_wc_t) ((uchar) b2 ^ 0x80)))
 #define UTF8MB4_CODE(b0,b1,b2,b3) (((my_wc_t) ((uchar) b0 & 0x07) << 18) |\
                                   ((my_wc_t) ((uchar) b1 ^ 0x80) << 12) |\
                                   ((my_wc_t) ((uchar) b2 ^ 0x80) << 6)  |\
                                    (my_wc_t) ((uchar) b3 ^ 0x80))
 static inline int
 my_mb_wc_utf8mb3_quick(my_wc_t * pwc, const uchar *s, const uchar *e)
 {
  uchar c;
  if (s >= e)
    return MY_CS_TOOSMALL;
  c= s[0];
  if (c < 0x80)
  {
    *pwc = c;
    return 1;
  }
  else if (c < 0xc2)
    return MY_CS_ILSEQ;
  else if (c < 0xe0)
  {
    if (s+2 > e) /* We need 2 characters */
      return MY_CS_TOOSMALL2;
    if (!(IS_CONTINUATION_BYTE(s[1])))
      return MY_CS_ILSEQ;
    *pwc= UTF8MB2_CODE(c, s[1]);
    return 2;
  }
  else if (c < 0xf0)
  {
    if (s+3 > e) /* We need 3 characters */
      return MY_CS_TOOSMALL3;
    if (!IS_UTF8MB3_STEP2(c, s[1], s[2]))
      return MY_CS_ILSEQ;
    *pwc= UTF8MB3_CODE(c, s[1], s[2]);
    return 3;
  }
  return MY_CS_ILSEQ;
 }
 #ifdef HAVE_CHARSET_utf8mb4
 static inline int
 my_mb_wc_utf8mb4_quick(my_wc_t *pwc, const uchar *s, const uchar *e)
 {
  uchar c;
  if (s >= e)
    return MY_CS_TOOSMALL;
  c= s[0];
  if (c < 0x80)
  {
    *pwc= c;
    return 1;
  }
  else if (c < 0xc2)
    return MY_CS_ILSEQ;
  else if (c < 0xe0)
  {
    if (s + 2 > e) /* We need 2 characters */
      return MY_CS_TOOSMALL2;
    if (!(IS_CONTINUATION_BYTE(s[1])))
      return MY_CS_ILSEQ;
    *pwc= UTF8MB2_CODE(c, s[1]);
    return 2;
  }
  else if (c < 0xf0)
  {
    if (s + 3 > e) /* We need 3 characters */
      return MY_CS_TOOSMALL3;
    if (!IS_UTF8MB3_STEP2(c, s[1], s[2]))
      return MY_CS_ILSEQ;
    *pwc= UTF8MB3_CODE(c, s[1], s[2]);
    return 3;
  }
  else if (c < 0xf5)
  {
    if (s + 4 > e) /* We need 4 characters */
      return MY_CS_TOOSMALL4;
    if (!IS_UTF8MB4_STEP2(c, s[1], s[2], s[3]))
      return MY_CS_ILSEQ;
    *pwc= UTF8MB4_CODE(c, s[1], s[2], s[3]);
    return 4;
  }
  return MY_CS_ILSEQ;
 }
 #endif /* HAVE_CHARSET_utf8mb4*/
 #endif /* _CTYPE_UTF8_H */