2015-06-26 11:40:28 +02:00
|
|
|
/*
|
|
|
|
Copyright (c) 2015, MariaDB Foundation
|
2020-01-26 17:27:13 +01:00
|
|
|
Copyright (c) 2015, 2020, MariaDB Corporation.
|
2015-06-26 11:40:28 +02:00
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation; version 2 of the License.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program; if not, write to the Free Software
|
2019-05-11 21:19:05 +02:00
|
|
|
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA
|
2015-06-26 11:40:28 +02:00
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef MY_FUNCTION_NAME
|
|
|
|
#error MY_FUNCTION_NAME is not defined
|
|
|
|
#endif
|
|
|
|
|
2018-10-19 12:20:31 +02:00
|
|
|
/*
|
|
|
|
Define strnncoll() and strnncollsp() by default,
|
|
|
|
unless "#define DEFINE_STRNNCOLL 0" is specified.
|
|
|
|
*/
|
|
|
|
#ifndef DEFINE_STRNNCOLL
|
|
|
|
#define DEFINE_STRNNCOLL 1
|
|
|
|
#endif
|
|
|
|
|
2015-06-26 11:40:28 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
The weight for automatically padded spaces when comparing strings with
|
|
|
|
the PAD SPACE property.
|
|
|
|
Should normally be equal to the weight of a regular space.
|
|
|
|
*/
|
|
|
|
#ifndef WEIGHT_PAD_SPACE
|
|
|
|
#define WEIGHT_PAD_SPACE (' ')
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
Weight of an illegal byte, must follow these rules:
|
|
|
|
1. Must be greater than weight of any normal character in the collation.
|
|
|
|
2. Two different bad bytes must have different weights and must be
|
|
|
|
compared in their binary order.
|
|
|
|
|
|
|
|
Depends on mbmaxlen of the character set, as well as how the collation
|
|
|
|
sorts various single-byte and multi-byte character blocks.
|
|
|
|
|
|
|
|
The macro below is the default definition, it is suitable for mbmaxlen=2
|
|
|
|
character sets that sort all multi-byte characters after all single-byte
|
|
|
|
characters: big5, euckr, gb2312, gbk.
|
|
|
|
|
|
|
|
All mbmaxlen>2 character sets must provide their own definitions.
|
|
|
|
All collations that have a more complex order (than just MB1 followed by MB2)
|
|
|
|
must also provide their own definitions (see definitions for
|
|
|
|
cp932_japanese_ci and sjis_japanese_ci as examples of a more complex order).
|
|
|
|
*/
|
|
|
|
#ifndef WEIGHT_ILSEQ
|
|
|
|
#define WEIGHT_ILSEQ(x) (0xFF00 + (x))
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
2018-10-19 12:20:31 +02:00
|
|
|
#if DEFINE_STRNNCOLL
|
|
|
|
|
2015-06-26 11:40:28 +02:00
|
|
|
/**
|
|
|
|
Scan a valid character, or a bad byte, or an auto-padded space
|
|
|
|
from a string and calculate the weight of the scanned sequence.
|
|
|
|
|
|
|
|
@param [OUT] weight - the weight is returned here
|
|
|
|
@param str - the string
|
|
|
|
@param end - the end of the string
|
|
|
|
@return - the number of bytes scanned
|
|
|
|
|
|
|
|
The including source file must define the following macros:
|
2015-07-06 13:50:56 +02:00
|
|
|
IS_MB1_CHAR(b0) - for character sets that have MB1 characters
|
|
|
|
IS_MB1_MB2HEAD_GAP(b0) - optional, for better performance
|
|
|
|
IS_MB2_CHAR(b0,b1) - for character sets that have MB2 characters
|
|
|
|
IS_MB3_CHAR(b0,b1,b2) - for character sets that have MB3 characters
|
|
|
|
IS_MB4_CHAR(b0,b1,b2,b3) - for character sets with have MB4 characters
|
2015-06-26 11:40:28 +02:00
|
|
|
WEIGHT_PAD_SPACE
|
2015-07-06 13:50:56 +02:00
|
|
|
WEIGHT_MB1(b0) - for character sets that have MB1 characters
|
|
|
|
WEIGHT_MB2(b0,b1) - for character sets that have MB2 characters
|
|
|
|
WEIGHT_MB3(b0,b1,b2) - for character sets that have MB3 characters
|
|
|
|
WEIGHT_MB4(b0,b1,b2,b3) - for character sets that have MB4 characters
|
2015-06-26 11:40:28 +02:00
|
|
|
WEIGHT_ILSEQ(x)
|
|
|
|
*/
|
|
|
|
static inline uint
|
|
|
|
MY_FUNCTION_NAME(scan_weight)(int *weight, const uchar *str, const uchar *end)
|
|
|
|
{
|
|
|
|
if (str >= end)
|
|
|
|
{
|
|
|
|
*weight= WEIGHT_PAD_SPACE;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-07-06 13:50:56 +02:00
|
|
|
#ifdef IS_MB1_CHAR
|
2015-06-26 11:40:28 +02:00
|
|
|
if (IS_MB1_CHAR(*str))
|
|
|
|
{
|
|
|
|
*weight= WEIGHT_MB1(*str); /* A valid single byte character*/
|
|
|
|
return 1;
|
|
|
|
}
|
2015-07-06 13:50:56 +02:00
|
|
|
#endif
|
2015-06-26 11:40:28 +02:00
|
|
|
|
2015-07-03 15:24:16 +02:00
|
|
|
#ifdef IS_MB1_MBHEAD_UNUSED_GAP
|
|
|
|
/*
|
|
|
|
Quickly filter out unused bytes that are neither MB1 nor MBHEAD.
|
2019-06-28 07:05:12 +02:00
|
|
|
E.g. [0x80..0xC1] in utf8mb(3|4). This allows using simplified conditions
|
2015-07-03 15:24:16 +02:00
|
|
|
in IS_MB2_CHAR(), IS_MB3_CHAR(), etc.
|
|
|
|
*/
|
|
|
|
if (IS_MB1_MBHEAD_UNUSED_GAP(*str))
|
|
|
|
goto bad;
|
|
|
|
#endif
|
|
|
|
|
2015-07-06 13:50:56 +02:00
|
|
|
#ifdef IS_MB2_CHAR
|
2015-06-26 11:40:28 +02:00
|
|
|
if (str + 2 > end) /* The string ended unexpectedly */
|
|
|
|
goto bad; /* Treat as a bad byte */
|
|
|
|
|
|
|
|
if (IS_MB2_CHAR(str[0], str[1]))
|
|
|
|
{
|
|
|
|
*weight= WEIGHT_MB2(str[0], str[1]);
|
|
|
|
return 2; /* A valid two-byte character */
|
|
|
|
}
|
2015-07-06 13:50:56 +02:00
|
|
|
#endif
|
2015-06-26 11:40:28 +02:00
|
|
|
|
2015-07-03 08:33:17 +02:00
|
|
|
#ifdef IS_MB3_CHAR
|
|
|
|
if (str + 3 > end) /* Incomplete three-byte character */
|
|
|
|
goto bad;
|
|
|
|
|
|
|
|
if (IS_MB3_CHAR(str[0], str[1], str[2]))
|
|
|
|
{
|
|
|
|
*weight= WEIGHT_MB3(str[0], str[1], str[2]);
|
|
|
|
return 3; /* A valid three-byte character */
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2015-07-06 08:47:39 +02:00
|
|
|
#ifdef IS_MB4_CHAR
|
|
|
|
if (str + 4 > end) /* Incomplete four-byte character */
|
|
|
|
goto bad;
|
|
|
|
|
|
|
|
if (IS_MB4_CHAR(str[0], str[1], str[2], str[3]))
|
|
|
|
{
|
|
|
|
*weight= WEIGHT_MB4(str[0], str[1], str[2], str[3]);
|
|
|
|
return 4; /* A valid four-byte character */
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
2015-06-26 11:40:28 +02:00
|
|
|
bad:
|
|
|
|
*weight= WEIGHT_ILSEQ(str[0]); /* Bad byte */
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
Compare two strings according to the collation,
|
|
|
|
without handling the PAD SPACE property.
|
|
|
|
|
2020-01-26 17:27:13 +01:00
|
|
|
Note, strnncoll() is usually used to compare identifiers.
|
2015-06-26 11:40:28 +02:00
|
|
|
Perhaps we should eventually (in 10.2?) create a new collation
|
2019-06-28 07:05:12 +02:00
|
|
|
my_charset_utf8mb3_general_ci_no_pad and have only one comparison function
|
2015-06-26 11:40:28 +02:00
|
|
|
in MY_COLLATION_HANDLER.
|
|
|
|
|
|
|
|
@param cs - the character set and collation
|
|
|
|
@param a - the left string
|
|
|
|
@param a_length - the length of the left string
|
|
|
|
@param b - the right string
|
|
|
|
@param b_length - the length of the right string
|
|
|
|
@param b_is_prefix - if the caller wants to check if "b" is a prefix of "a"
|
|
|
|
@return - the comparison result
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
MY_FUNCTION_NAME(strnncoll)(CHARSET_INFO *cs __attribute__((unused)),
|
|
|
|
const uchar *a, size_t a_length,
|
|
|
|
const uchar *b, size_t b_length,
|
|
|
|
my_bool b_is_prefix)
|
|
|
|
{
|
|
|
|
const uchar *a_end= a + a_length;
|
|
|
|
const uchar *b_end= b + b_length;
|
|
|
|
for ( ; ; )
|
|
|
|
{
|
|
|
|
int a_weight, b_weight, res;
|
|
|
|
uint a_wlen= MY_FUNCTION_NAME(scan_weight)(&a_weight, a, a_end);
|
|
|
|
uint b_wlen= MY_FUNCTION_NAME(scan_weight)(&b_weight, b, b_end);
|
|
|
|
/*
|
|
|
|
a_wlen b_wlen Comment
|
|
|
|
------ ------ -------
|
|
|
|
0 0 Strings ended simultaneously, "a" and "b" are equal.
|
|
|
|
0 >0 "a" is a prefix of "b", so "a" is smaller.
|
|
|
|
>0 0 "b" is a prefix of "a", check b_is_prefix.
|
|
|
|
>0 >0 Two weights were scanned, check weight difference.
|
|
|
|
*/
|
|
|
|
if (!a_wlen)
|
|
|
|
return b_wlen ? -b_weight : 0;
|
|
|
|
|
|
|
|
if (!b_wlen)
|
|
|
|
return b_is_prefix ? 0 : a_weight;
|
|
|
|
|
|
|
|
if ((res= (a_weight - b_weight)))
|
|
|
|
return res;
|
|
|
|
/*
|
|
|
|
None of the strings has ended yet.
|
|
|
|
*/
|
|
|
|
DBUG_ASSERT(a < a_end);
|
|
|
|
DBUG_ASSERT(b < b_end);
|
|
|
|
a+= a_wlen;
|
|
|
|
b+= b_wlen;
|
|
|
|
}
|
|
|
|
DBUG_ASSERT(0);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2016-09-06 10:50:02 +02:00
|
|
|
#ifdef DEFINE_STRNNCOLLSP_NOPAD
|
|
|
|
|
2015-06-26 11:40:28 +02:00
|
|
|
/**
|
2016-09-06 10:50:02 +02:00
|
|
|
Compare two strings according to the collation, with NO PAD handling.
|
2015-06-26 11:40:28 +02:00
|
|
|
|
|
|
|
@param cs - the character set and collation
|
|
|
|
@param a - the left string
|
|
|
|
@param a_length - the length of the left string
|
|
|
|
@param b - the right string
|
|
|
|
@param b_length - the length of the right string
|
|
|
|
@return - the comparison result
|
|
|
|
*/
|
2016-09-06 10:50:02 +02:00
|
|
|
static int
|
|
|
|
MY_FUNCTION_NAME(strnncollsp)(CHARSET_INFO *cs __attribute__((unused)),
|
|
|
|
const uchar *a, size_t a_length,
|
|
|
|
const uchar *b, size_t b_length)
|
|
|
|
{
|
|
|
|
return MY_FUNCTION_NAME(strnncoll)(cs, a, a_length, b, b_length, FALSE);
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
/**
|
|
|
|
Compare two strings according to the collation, with PAD SPACE handling.
|
2015-06-26 11:40:28 +02:00
|
|
|
|
2016-09-06 10:50:02 +02:00
|
|
|
@param cs - the character set and collation
|
|
|
|
@param a - the left string
|
|
|
|
@param a_length - the length of the left string
|
|
|
|
@param b - the right string
|
|
|
|
@param b_length - the length of the right string
|
|
|
|
@return - the comparison result
|
|
|
|
*/
|
2015-06-26 11:40:28 +02:00
|
|
|
static int
|
|
|
|
MY_FUNCTION_NAME(strnncollsp)(CHARSET_INFO *cs __attribute__((unused)),
|
|
|
|
const uchar *a, size_t a_length,
|
2016-03-31 09:04:48 +02:00
|
|
|
const uchar *b, size_t b_length)
|
2015-06-26 11:40:28 +02:00
|
|
|
{
|
|
|
|
const uchar *a_end= a + a_length;
|
|
|
|
const uchar *b_end= b + b_length;
|
|
|
|
for ( ; ; )
|
|
|
|
{
|
|
|
|
int a_weight, b_weight, res;
|
|
|
|
uint a_wlen= MY_FUNCTION_NAME(scan_weight)(&a_weight, a, a_end);
|
|
|
|
uint b_wlen= MY_FUNCTION_NAME(scan_weight)(&b_weight, b, b_end);
|
|
|
|
if ((res= (a_weight - b_weight)))
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
Got two different weights. Each weight can be generated by either of:
|
|
|
|
- a real character
|
|
|
|
- a bad byte sequence or an incomplete byte sequence
|
|
|
|
- an auto-generated trailing space (PAD SPACE)
|
|
|
|
It does not matter how exactly each weight was generated.
|
|
|
|
Just return the weight difference.
|
|
|
|
*/
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
if (!a_wlen && !b_wlen)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
Got two auto-generated trailing spaces, i.e.
|
|
|
|
both strings have now ended, so they are equal.
|
|
|
|
*/
|
|
|
|
DBUG_ASSERT(a == a_end);
|
|
|
|
DBUG_ASSERT(b == b_end);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
At least one of the strings has not ended yet, continue comparison.
|
|
|
|
*/
|
|
|
|
DBUG_ASSERT(a < a_end || b < b_end);
|
|
|
|
a+= a_wlen;
|
|
|
|
b+= b_wlen;
|
|
|
|
}
|
|
|
|
DBUG_ASSERT(0);
|
|
|
|
return 0;
|
|
|
|
}
|
2016-09-06 10:50:02 +02:00
|
|
|
#endif
|
2015-06-26 11:40:28 +02:00
|
|
|
|
2018-10-19 12:20:31 +02:00
|
|
|
#endif /* DEFINE_STRNNCOLL */
|
|
|
|
|
2016-03-23 11:26:43 +01:00
|
|
|
|
|
|
|
#ifdef DEFINE_STRNXFRM
|
|
|
|
#ifndef WEIGHT_MB2_FRM
|
|
|
|
#define WEIGHT_MB2_FRM(x,y) WEIGHT_MB2(x,y)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
static size_t
|
|
|
|
MY_FUNCTION_NAME(strnxfrm)(CHARSET_INFO *cs,
|
|
|
|
uchar *dst, size_t dstlen, uint nweights,
|
|
|
|
const uchar *src, size_t srclen, uint flags)
|
|
|
|
{
|
|
|
|
uchar *d0= dst;
|
|
|
|
uchar *de= dst + dstlen;
|
|
|
|
const uchar *se= src + srclen;
|
|
|
|
const uchar *sort_order= cs->sort_order;
|
|
|
|
|
|
|
|
for (; dst < de && src < se && nweights; nweights--)
|
|
|
|
{
|
2020-01-26 17:27:13 +01:00
|
|
|
if (my_ci_charlen(cs, src, se) > 1)
|
2016-03-23 11:26:43 +01:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
Note, it is safe not to check (src < se)
|
2020-01-26 17:27:13 +01:00
|
|
|
in the code below, because my_ci_charlen() would
|
2016-03-23 11:26:43 +01:00
|
|
|
not return 2 if src was too short
|
|
|
|
*/
|
|
|
|
uint16 e= WEIGHT_MB2_FRM(src[0], src[1]);
|
|
|
|
*dst++= (uchar) (e >> 8);
|
|
|
|
if (dst < de)
|
|
|
|
*dst++= (uchar) (e & 0xFF);
|
|
|
|
src+= 2;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
*dst++= sort_order ? sort_order[*src++] : *src++;
|
|
|
|
}
|
2016-09-06 10:50:02 +02:00
|
|
|
#ifdef DEFINE_STRNNCOLLSP_NOPAD
|
|
|
|
return my_strxfrm_pad_desc_and_reverse_nopad(cs, d0, dst, de,
|
|
|
|
nweights, flags, 0);
|
|
|
|
#else
|
2016-03-23 11:26:43 +01:00
|
|
|
return my_strxfrm_pad_desc_and_reverse(cs, d0, dst, de, nweights, flags, 0);
|
2016-09-06 10:50:02 +02:00
|
|
|
#endif
|
2016-03-23 11:26:43 +01:00
|
|
|
}
|
|
|
|
#endif /* DEFINE_STRNXFRM */
|
|
|
|
|
|
|
|
|
2018-10-19 12:20:31 +02:00
|
|
|
#if defined(DEFINE_STRNXFRM_UNICODE) || defined(DEFINE_STRNXFRM_UNICODE_NOPAD)
|
|
|
|
|
|
|
|
/*
|
|
|
|
Store sorting weights using 2 bytes per character.
|
|
|
|
|
|
|
|
This function is shared between
|
2019-06-28 07:05:12 +02:00
|
|
|
- utf8mb3_general_ci, utf8mb3_bin, ucs2_general_ci, ucs2_bin
|
2018-10-19 12:20:31 +02:00
|
|
|
which support BMP only (U+0000..U+FFFF).
|
|
|
|
- utf8mb4_general_ci, utf16_general_ci, utf32_general_ci,
|
|
|
|
which map all supplementary characters to weight 0xFFFD.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef MY_MB_WC
|
|
|
|
#error MY_MB_WC must be defined for DEFINE_STRNXFRM_UNICODE
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef OPTIMIZE_ASCII
|
|
|
|
#error OPTIMIZE_ASCII must be defined for DEFINE_STRNXFRM_UNICODE
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef UNICASE_MAXCHAR
|
|
|
|
#error UNICASE_MAXCHAR must be defined for DEFINE_STRNXFRM_UNICODE
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef UNICASE_PAGE0
|
|
|
|
#error UNICASE_PAGE0 must be defined for DEFINE_STRNXFRM_UNICODE
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef UNICASE_PAGES
|
|
|
|
#error UNICASE_PAGES must be defined for DEFINE_STRNXFRM_UNICODE
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
static size_t
|
2020-01-29 15:41:04 +01:00
|
|
|
MY_FUNCTION_NAME(strnxfrm_internal)(CHARSET_INFO *cs __attribute__((unused)),
|
2018-10-19 12:20:31 +02:00
|
|
|
uchar *dst, uchar *de,
|
|
|
|
uint *nweights,
|
|
|
|
const uchar *src, const uchar *se)
|
|
|
|
{
|
|
|
|
my_wc_t UNINIT_VAR(wc);
|
|
|
|
uchar *dst0= dst;
|
|
|
|
|
|
|
|
DBUG_ASSERT(src || !se);
|
|
|
|
DBUG_ASSERT((cs->state & MY_CS_LOWER_SORT) == 0);
|
|
|
|
DBUG_ASSERT(0x7F <= UNICASE_MAXCHAR);
|
|
|
|
|
|
|
|
for (; dst < de && *nweights; (*nweights)--)
|
|
|
|
{
|
|
|
|
int res;
|
|
|
|
#if OPTIMIZE_ASCII
|
|
|
|
if (src >= se)
|
|
|
|
break;
|
|
|
|
if (src[0] <= 0x7F)
|
|
|
|
{
|
|
|
|
wc= UNICASE_PAGE0[*src++].sort;
|
|
|
|
PUT_WC_BE2_HAVE_1BYTE(dst, de, wc);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
if ((res= MY_MB_WC(cs, &wc, src, se)) <= 0)
|
|
|
|
break;
|
|
|
|
src+= res;
|
|
|
|
if (wc <= UNICASE_MAXCHAR)
|
|
|
|
{
|
|
|
|
MY_UNICASE_CHARACTER *page;
|
|
|
|
if ((page= UNICASE_PAGES[wc >> 8]))
|
|
|
|
wc= page[wc & 0xFF].sort;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
wc= MY_CS_REPLACEMENT_CHARACTER;
|
|
|
|
PUT_WC_BE2_HAVE_1BYTE(dst, de, wc);
|
|
|
|
}
|
|
|
|
return dst - dst0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static size_t
|
|
|
|
MY_FUNCTION_NAME(strnxfrm)(CHARSET_INFO *cs,
|
|
|
|
uchar *dst, size_t dstlen, uint nweights,
|
|
|
|
const uchar *src, size_t srclen, uint flags)
|
|
|
|
{
|
|
|
|
uchar *dst0= dst;
|
|
|
|
uchar *de= dst + dstlen;
|
|
|
|
dst+= MY_FUNCTION_NAME(strnxfrm_internal)(cs, dst, de, &nweights,
|
|
|
|
src, src + srclen);
|
|
|
|
DBUG_ASSERT(dst <= de); /* Safety */
|
|
|
|
|
|
|
|
if (dst < de && nweights && (flags & MY_STRXFRM_PAD_WITH_SPACE))
|
|
|
|
dst+= my_strxfrm_pad_nweights_unicode(dst, de, nweights);
|
|
|
|
|
|
|
|
my_strxfrm_desc_and_reverse(dst0, dst, flags, 0);
|
|
|
|
|
|
|
|
if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && dst < de)
|
|
|
|
dst+= my_strxfrm_pad_unicode(dst, de);
|
|
|
|
return dst - dst0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef DEFINE_STRNXFRM_UNICODE_NOPAD
|
|
|
|
static size_t
|
|
|
|
MY_FUNCTION_NAME(strnxfrm_nopad)(CHARSET_INFO *cs,
|
|
|
|
uchar *dst, size_t dstlen,
|
|
|
|
uint nweights,
|
|
|
|
const uchar *src, size_t srclen, uint flags)
|
|
|
|
{
|
|
|
|
uchar *dst0= dst;
|
|
|
|
uchar *de= dst + dstlen;
|
|
|
|
dst+= MY_FUNCTION_NAME(strnxfrm_internal)(cs, dst, de, &nweights,
|
|
|
|
src, src + srclen);
|
|
|
|
DBUG_ASSERT(dst <= de); /* Safety */
|
|
|
|
|
|
|
|
if (dst < de && nweights && (flags & MY_STRXFRM_PAD_WITH_SPACE))
|
|
|
|
{
|
|
|
|
size_t len= de - dst;
|
|
|
|
set_if_smaller(len, nweights * 2);
|
|
|
|
memset(dst, 0x00, len);
|
|
|
|
dst+= len;
|
|
|
|
}
|
|
|
|
|
|
|
|
my_strxfrm_desc_and_reverse(dst0, dst, flags, 0);
|
|
|
|
|
|
|
|
if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && dst < de)
|
|
|
|
{
|
|
|
|
memset(dst, 0x00, de - dst);
|
|
|
|
dst= de;
|
|
|
|
}
|
|
|
|
return dst - dst0;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#endif /* DEFINE_STRNXFRM_UNICODE || DEFINE_STRNXFRM_UNICODE_NOPAD */
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef DEFINE_STRNXFRM_UNICODE_BIN2
|
|
|
|
|
|
|
|
/*
|
|
|
|
Store sorting weights using 2 bytes per character.
|
|
|
|
|
|
|
|
These functions are shared between
|
2019-06-28 07:05:12 +02:00
|
|
|
- utf8mb3_general_ci, utf8mb3_bin, ucs2_general_ci, ucs2_bin
|
2018-10-19 12:20:31 +02:00
|
|
|
which support BMP only (U+0000..U+FFFF).
|
|
|
|
- utf8mb4_general_ci, utf16_general_ci, utf32_general_ci,
|
|
|
|
which map all supplementary characters to weight 0xFFFD.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef MY_MB_WC
|
|
|
|
#error MY_MB_WC must be defined for DEFINE_STRNXFRM_UNICODE_BIN2
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef OPTIMIZE_ASCII
|
|
|
|
#error OPTIMIZE_ASCII must be defined for DEFINE_STRNXFRM_UNICODE_BIN2
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
static size_t
|
2020-01-29 15:41:04 +01:00
|
|
|
MY_FUNCTION_NAME(strnxfrm_internal)(CHARSET_INFO *cs __attribute__((unused)),
|
2018-10-19 12:20:31 +02:00
|
|
|
uchar *dst, uchar *de,
|
|
|
|
uint *nweights,
|
|
|
|
const uchar *src,
|
|
|
|
const uchar *se)
|
|
|
|
{
|
|
|
|
my_wc_t UNINIT_VAR(wc);
|
|
|
|
uchar *dst0= dst;
|
|
|
|
|
|
|
|
DBUG_ASSERT(src || !se);
|
|
|
|
|
|
|
|
for (; dst < de && *nweights; (*nweights)--)
|
|
|
|
{
|
|
|
|
int res;
|
|
|
|
#if OPTIMIZE_ASCII
|
|
|
|
if (src >= se)
|
|
|
|
break;
|
|
|
|
if (src[0] <= 0x7F)
|
|
|
|
{
|
|
|
|
wc= *src++;
|
|
|
|
PUT_WC_BE2_HAVE_1BYTE(dst, de, wc);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
if ((res= MY_MB_WC(cs, &wc, src, se)) <= 0)
|
|
|
|
break;
|
|
|
|
src+= res;
|
|
|
|
if (wc > 0xFFFF)
|
|
|
|
wc= MY_CS_REPLACEMENT_CHARACTER;
|
|
|
|
PUT_WC_BE2_HAVE_1BYTE(dst, de, wc);
|
|
|
|
}
|
|
|
|
return dst - dst0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static size_t
|
|
|
|
MY_FUNCTION_NAME(strnxfrm)(CHARSET_INFO *cs,
|
|
|
|
uchar *dst, size_t dstlen, uint nweights,
|
|
|
|
const uchar *src, size_t srclen, uint flags)
|
|
|
|
{
|
|
|
|
uchar *dst0= dst;
|
|
|
|
uchar *de= dst + dstlen;
|
|
|
|
dst+= MY_FUNCTION_NAME(strnxfrm_internal)(cs, dst, de, &nweights,
|
|
|
|
src, src + srclen);
|
|
|
|
DBUG_ASSERT(dst <= de); /* Safety */
|
|
|
|
|
|
|
|
if (dst < de && nweights && (flags & MY_STRXFRM_PAD_WITH_SPACE))
|
|
|
|
dst+= my_strxfrm_pad_nweights_unicode(dst, de, nweights);
|
|
|
|
|
|
|
|
my_strxfrm_desc_and_reverse(dst0, dst, flags, 0);
|
|
|
|
|
|
|
|
if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && dst < de)
|
|
|
|
dst+= my_strxfrm_pad_unicode(dst, de);
|
|
|
|
return dst - dst0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static size_t
|
|
|
|
MY_FUNCTION_NAME(strnxfrm_nopad)(CHARSET_INFO *cs,
|
|
|
|
uchar *dst, size_t dstlen, uint nweights,
|
|
|
|
const uchar *src, size_t srclen, uint flags)
|
|
|
|
{
|
|
|
|
uchar *dst0= dst;
|
|
|
|
uchar *de= dst + dstlen;
|
|
|
|
dst+= MY_FUNCTION_NAME(strnxfrm_internal)(cs, dst, de, &nweights,
|
|
|
|
src, src + srclen);
|
|
|
|
DBUG_ASSERT(dst <= de); /* Safety */
|
|
|
|
|
|
|
|
if (dst < de && nweights && (flags & MY_STRXFRM_PAD_WITH_SPACE))
|
|
|
|
{
|
|
|
|
size_t len= de - dst;
|
|
|
|
set_if_smaller(len, nweights * 2);
|
|
|
|
memset(dst, 0x00, len);
|
|
|
|
dst+= len;
|
|
|
|
}
|
|
|
|
|
|
|
|
my_strxfrm_desc_and_reverse(dst0, dst, flags, 0);
|
|
|
|
|
|
|
|
if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && dst < de)
|
|
|
|
{
|
|
|
|
memset(dst, 0x00, de - dst);
|
|
|
|
dst= de;
|
|
|
|
}
|
|
|
|
return dst - dst0;
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif /* DEFINE_STRNXFRM_UNICODE_BIN2 */
|
|
|
|
|
|
|
|
|
2015-06-26 11:40:28 +02:00
|
|
|
/*
|
|
|
|
We usually include this file at least two times from the same source file,
|
|
|
|
for the _ci and the _bin collations. Prepare for the second inclusion.
|
|
|
|
*/
|
|
|
|
#undef MY_FUNCTION_NAME
|
2018-10-19 12:20:31 +02:00
|
|
|
#undef MY_MB_WC
|
|
|
|
#undef OPTIMIZE_ASCII
|
|
|
|
#undef UNICASE_MAXCHAR
|
|
|
|
#undef UNICASE_PAGE0
|
|
|
|
#undef UNICASE_PAGES
|
2015-06-26 11:40:28 +02:00
|
|
|
#undef WEIGHT_ILSEQ
|
|
|
|
#undef WEIGHT_MB1
|
|
|
|
#undef WEIGHT_MB2
|
2015-07-03 08:33:17 +02:00
|
|
|
#undef WEIGHT_MB3
|
2015-07-06 08:47:39 +02:00
|
|
|
#undef WEIGHT_MB4
|
2015-06-26 11:40:28 +02:00
|
|
|
#undef WEIGHT_PAD_SPACE
|
2016-03-23 11:26:43 +01:00
|
|
|
#undef WEIGHT_MB2_FRM
|
|
|
|
#undef DEFINE_STRNXFRM
|
2018-10-19 12:20:31 +02:00
|
|
|
#undef DEFINE_STRNXFRM_UNICODE
|
|
|
|
#undef DEFINE_STRNXFRM_UNICODE_NOPAD
|
|
|
|
#undef DEFINE_STRNXFRM_UNICODE_BIN2
|
|
|
|
#undef DEFINE_STRNNCOLL
|
2016-09-06 10:50:02 +02:00
|
|
|
#undef DEFINE_STRNNCOLLSP_NOPAD
|