mirror of
https://github.com/MariaDB/server.git
synced 2025-01-27 01:04:19 +01:00
MDEV-17502 MDEV-17474 Change Unicode xxx_general_ci and xxx_bin collation implementation to "inline" style
This commit is contained in:
parent
1bb9041176
commit
a8efe7ab1f
5 changed files with 430 additions and 156 deletions
|
@ -871,14 +871,6 @@ size_t my_strnxfrm_mb_nopad(CHARSET_INFO *,
|
||||||
uchar *dst, size_t dstlen, uint nweights,
|
uchar *dst, size_t dstlen, uint nweights,
|
||||||
const uchar *src, size_t srclen, uint flags);
|
const uchar *src, size_t srclen, uint flags);
|
||||||
|
|
||||||
size_t my_strnxfrm_unicode(CHARSET_INFO *,
|
|
||||||
uchar *dst, size_t dstlen, uint nweights,
|
|
||||||
const uchar *src, size_t srclen, uint flags);
|
|
||||||
|
|
||||||
size_t my_strnxfrm_unicode_nopad(CHARSET_INFO *,
|
|
||||||
uchar *dst, size_t dstlen, uint nweights,
|
|
||||||
const uchar *src, size_t srclen, uint flags);
|
|
||||||
|
|
||||||
size_t my_strnxfrmlen_unicode(CHARSET_INFO *, size_t);
|
size_t my_strnxfrmlen_unicode(CHARSET_INFO *, size_t);
|
||||||
|
|
||||||
size_t my_strnxfrm_unicode_full_bin(CHARSET_INFO *,
|
size_t my_strnxfrm_unicode_full_bin(CHARSET_INFO *,
|
||||||
|
|
|
@ -23,6 +23,8 @@
|
||||||
#include <my_sys.h>
|
#include <my_sys.h>
|
||||||
#include <stdarg.h>
|
#include <stdarg.h>
|
||||||
|
|
||||||
|
#include "ctype-unidata.h"
|
||||||
|
|
||||||
|
|
||||||
#if defined(HAVE_CHARSET_utf16) || defined(HAVE_CHARSET_ucs2)
|
#if defined(HAVE_CHARSET_utf16) || defined(HAVE_CHARSET_ucs2)
|
||||||
#define HAVE_CHARSET_mb2
|
#define HAVE_CHARSET_mb2
|
||||||
|
@ -1192,10 +1194,17 @@ my_lengthsp_mb2(CHARSET_INFO *cs __attribute__((unused)),
|
||||||
static inline int my_weight_mb2_utf16mb2_general_ci(uchar b0, uchar b1)
|
static inline int my_weight_mb2_utf16mb2_general_ci(uchar b0, uchar b1)
|
||||||
{
|
{
|
||||||
my_wc_t wc= MY_UTF16_WC2(b0, b1);
|
my_wc_t wc= MY_UTF16_WC2(b0, b1);
|
||||||
MY_UNICASE_CHARACTER *page= my_unicase_default.page[wc >> 8];
|
MY_UNICASE_CHARACTER *page= my_unicase_default_pages[wc >> 8];
|
||||||
return (int) (page ? page[wc & 0xFF].sort : wc);
|
return (int) (page ? page[wc & 0xFF].sort : wc);
|
||||||
}
|
}
|
||||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16_general_ci
|
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16_general_ci
|
||||||
|
#define DEFINE_STRNXFRM_UNICODE
|
||||||
|
#define DEFINE_STRNXFRM_UNICODE_NOPAD
|
||||||
|
#define MY_MB_WC(cs, pwc, s, e) my_mb_wc_utf16_quick(pwc, s, e)
|
||||||
|
#define OPTIMIZE_ASCII 0
|
||||||
|
#define UNICASE_MAXCHAR MY_UNICASE_INFO_DEFAULT_MAXCHAR
|
||||||
|
#define UNICASE_PAGE0 my_unicase_default_page00
|
||||||
|
#define UNICASE_PAGES my_unicase_default_pages
|
||||||
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
|
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
|
||||||
#define WEIGHT_MB2(b0,b1) my_weight_mb2_utf16mb2_general_ci(b0,b1)
|
#define WEIGHT_MB2(b0,b1) my_weight_mb2_utf16mb2_general_ci(b0,b1)
|
||||||
#define WEIGHT_MB4(b0,b1,b2,b3) MY_CS_REPLACEMENT_CHARACTER
|
#define WEIGHT_MB4(b0,b1,b2,b3) MY_CS_REPLACEMENT_CHARACTER
|
||||||
|
@ -1493,7 +1502,7 @@ static MY_COLLATION_HANDLER my_collation_utf16_general_ci_handler =
|
||||||
NULL, /* init */
|
NULL, /* init */
|
||||||
my_strnncoll_utf16_general_ci,
|
my_strnncoll_utf16_general_ci,
|
||||||
my_strnncollsp_utf16_general_ci,
|
my_strnncollsp_utf16_general_ci,
|
||||||
my_strnxfrm_unicode,
|
my_strnxfrm_utf16_general_ci,
|
||||||
my_strnxfrmlen_unicode,
|
my_strnxfrmlen_unicode,
|
||||||
my_like_range_generic,
|
my_like_range_generic,
|
||||||
my_wildcmp_utf16_ci,
|
my_wildcmp_utf16_ci,
|
||||||
|
@ -1525,7 +1534,7 @@ static MY_COLLATION_HANDLER my_collation_utf16_general_nopad_ci_handler =
|
||||||
NULL, /* init */
|
NULL, /* init */
|
||||||
my_strnncoll_utf16_general_ci,
|
my_strnncoll_utf16_general_ci,
|
||||||
my_strnncollsp_utf16_general_nopad_ci,
|
my_strnncollsp_utf16_general_nopad_ci,
|
||||||
my_strnxfrm_unicode_nopad,
|
my_strnxfrm_nopad_utf16_general_ci,
|
||||||
my_strnxfrmlen_unicode,
|
my_strnxfrmlen_unicode,
|
||||||
my_like_range_generic,
|
my_like_range_generic,
|
||||||
my_wildcmp_utf16_ci,
|
my_wildcmp_utf16_ci,
|
||||||
|
@ -1722,6 +1731,13 @@ struct charset_info_st my_charset_utf16_nopad_bin=
|
||||||
#define IS_MB4_CHAR(b0,b1,b2,b3) (MY_UTF16_HIGH_HEAD(b1) && MY_UTF16_LOW_HEAD(b3))
|
#define IS_MB4_CHAR(b0,b1,b2,b3) (MY_UTF16_HIGH_HEAD(b1) && MY_UTF16_LOW_HEAD(b3))
|
||||||
|
|
||||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16le_general_ci
|
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16le_general_ci
|
||||||
|
#define DEFINE_STRNXFRM_UNICODE
|
||||||
|
#define DEFINE_STRNXFRM_UNICODE_NOPAD
|
||||||
|
#define MY_MB_WC(cs, pwc, s, e) (cs->cset->mb_wc(cs, pwc, s, e))
|
||||||
|
#define OPTIMIZE_ASCII 0
|
||||||
|
#define UNICASE_MAXCHAR MY_UNICASE_INFO_DEFAULT_MAXCHAR
|
||||||
|
#define UNICASE_PAGE0 my_unicase_default_page00
|
||||||
|
#define UNICASE_PAGES my_unicase_default_pages
|
||||||
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
|
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
|
||||||
#define WEIGHT_MB2(b0,b1) my_weight_mb2_utf16mb2_general_ci(b1,b0)
|
#define WEIGHT_MB2(b0,b1) my_weight_mb2_utf16mb2_general_ci(b1,b0)
|
||||||
#define WEIGHT_MB4(b0,b1,b2,b3) MY_CS_REPLACEMENT_CHARACTER
|
#define WEIGHT_MB4(b0,b1,b2,b3) MY_CS_REPLACEMENT_CHARACTER
|
||||||
|
@ -1826,7 +1842,7 @@ static MY_COLLATION_HANDLER my_collation_utf16le_general_ci_handler =
|
||||||
NULL, /* init */
|
NULL, /* init */
|
||||||
my_strnncoll_utf16le_general_ci,
|
my_strnncoll_utf16le_general_ci,
|
||||||
my_strnncollsp_utf16le_general_ci,
|
my_strnncollsp_utf16le_general_ci,
|
||||||
my_strnxfrm_unicode,
|
my_strnxfrm_utf16le_general_ci,
|
||||||
my_strnxfrmlen_unicode,
|
my_strnxfrmlen_unicode,
|
||||||
my_like_range_generic,
|
my_like_range_generic,
|
||||||
my_wildcmp_utf16_ci,
|
my_wildcmp_utf16_ci,
|
||||||
|
@ -1858,7 +1874,7 @@ static MY_COLLATION_HANDLER my_collation_utf16le_general_nopad_ci_handler =
|
||||||
NULL, /* init */
|
NULL, /* init */
|
||||||
my_strnncoll_utf16le_general_ci,
|
my_strnncoll_utf16le_general_ci,
|
||||||
my_strnncollsp_utf16le_general_nopad_ci,
|
my_strnncollsp_utf16le_general_nopad_ci,
|
||||||
my_strnxfrm_unicode_nopad,
|
my_strnxfrm_nopad_utf16le_general_ci,
|
||||||
my_strnxfrmlen_unicode,
|
my_strnxfrmlen_unicode,
|
||||||
my_like_range_generic,
|
my_like_range_generic,
|
||||||
my_wildcmp_utf16_ci,
|
my_wildcmp_utf16_ci,
|
||||||
|
@ -2073,12 +2089,19 @@ static inline int my_weight_utf32_general_ci(uchar b0, uchar b1,
|
||||||
my_wc_t wc= MY_UTF32_WC4(b0, b1, b2, b3);
|
my_wc_t wc= MY_UTF32_WC4(b0, b1, b2, b3);
|
||||||
if (wc <= 0xFFFF)
|
if (wc <= 0xFFFF)
|
||||||
{
|
{
|
||||||
MY_UNICASE_CHARACTER *page= my_unicase_default.page[wc >> 8];
|
MY_UNICASE_CHARACTER *page= my_unicase_default_pages[wc >> 8];
|
||||||
return (int) (page ? page[wc & 0xFF].sort : wc);
|
return (int) (page ? page[wc & 0xFF].sort : wc);
|
||||||
}
|
}
|
||||||
return MY_CS_REPLACEMENT_CHARACTER;
|
return MY_CS_REPLACEMENT_CHARACTER;
|
||||||
}
|
}
|
||||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf32_general_ci
|
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf32_general_ci
|
||||||
|
#define DEFINE_STRNXFRM_UNICODE
|
||||||
|
#define DEFINE_STRNXFRM_UNICODE_NOPAD
|
||||||
|
#define MY_MB_WC(cs, pwc, s, e) my_mb_wc_utf32_quick(pwc, s, e)
|
||||||
|
#define OPTIMIZE_ASCII 0
|
||||||
|
#define UNICASE_MAXCHAR MY_UNICASE_INFO_DEFAULT_MAXCHAR
|
||||||
|
#define UNICASE_PAGE0 my_unicase_default_page00
|
||||||
|
#define UNICASE_PAGES my_unicase_default_pages
|
||||||
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
|
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
|
||||||
#define WEIGHT_MB4(b0,b1,b2,b3) my_weight_utf32_general_ci(b0, b1, b2, b3)
|
#define WEIGHT_MB4(b0,b1,b2,b3) my_weight_utf32_general_ci(b0, b1, b2, b3)
|
||||||
#include "strcoll.ic"
|
#include "strcoll.ic"
|
||||||
|
@ -2642,7 +2665,7 @@ static MY_COLLATION_HANDLER my_collation_utf32_general_ci_handler =
|
||||||
NULL, /* init */
|
NULL, /* init */
|
||||||
my_strnncoll_utf32_general_ci,
|
my_strnncoll_utf32_general_ci,
|
||||||
my_strnncollsp_utf32_general_ci,
|
my_strnncollsp_utf32_general_ci,
|
||||||
my_strnxfrm_unicode,
|
my_strnxfrm_utf32_general_ci,
|
||||||
my_strnxfrmlen_unicode,
|
my_strnxfrmlen_unicode,
|
||||||
my_like_range_generic,
|
my_like_range_generic,
|
||||||
my_wildcmp_utf32_ci,
|
my_wildcmp_utf32_ci,
|
||||||
|
@ -2674,7 +2697,7 @@ static MY_COLLATION_HANDLER my_collation_utf32_general_nopad_ci_handler =
|
||||||
NULL, /* init */
|
NULL, /* init */
|
||||||
my_strnncoll_utf32_general_ci,
|
my_strnncoll_utf32_general_ci,
|
||||||
my_strnncollsp_utf32_general_nopad_ci,
|
my_strnncollsp_utf32_general_nopad_ci,
|
||||||
my_strnxfrm_unicode_nopad,
|
my_strnxfrm_nopad_utf32_general_ci,
|
||||||
my_strnxfrmlen_unicode,
|
my_strnxfrmlen_unicode,
|
||||||
my_like_range_generic,
|
my_like_range_generic,
|
||||||
my_wildcmp_utf32_ci,
|
my_wildcmp_utf32_ci,
|
||||||
|
@ -2941,20 +2964,30 @@ static const uchar to_upper_ucs2[] = {
|
||||||
static inline int my_weight_mb2_ucs2_general_ci(uchar b0, uchar b1)
|
static inline int my_weight_mb2_ucs2_general_ci(uchar b0, uchar b1)
|
||||||
{
|
{
|
||||||
my_wc_t wc= UCS2_CODE(b0, b1);
|
my_wc_t wc= UCS2_CODE(b0, b1);
|
||||||
MY_UNICASE_CHARACTER *page= my_unicase_default.page[wc >> 8];
|
MY_UNICASE_CHARACTER *page= my_unicase_default_pages[wc >> 8];
|
||||||
return (int) (page ? page[wc & 0xFF].sort : wc);
|
return (int) (page ? page[wc & 0xFF].sort : wc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _ucs2_general_ci
|
#define MY_FUNCTION_NAME(x) my_ ## x ## _ucs2_general_ci
|
||||||
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
|
#define DEFINE_STRNXFRM_UNICODE
|
||||||
#define WEIGHT_MB2(b0,b1) my_weight_mb2_ucs2_general_ci(b0,b1)
|
#define DEFINE_STRNXFRM_UNICODE_NOPAD
|
||||||
|
#define MY_MB_WC(cs, pwc, s, e) my_mb_wc_ucs2_quick(pwc, s, e)
|
||||||
|
#define OPTIMIZE_ASCII 0
|
||||||
|
#define UNICASE_MAXCHAR MY_UNICASE_INFO_DEFAULT_MAXCHAR
|
||||||
|
#define UNICASE_PAGE0 my_unicase_default_page00
|
||||||
|
#define UNICASE_PAGES my_unicase_default_pages
|
||||||
|
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
|
||||||
|
#define WEIGHT_MB2(b0,b1) my_weight_mb2_ucs2_general_ci(b0,b1)
|
||||||
#include "strcoll.ic"
|
#include "strcoll.ic"
|
||||||
|
|
||||||
|
|
||||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _ucs2_bin
|
#define MY_FUNCTION_NAME(x) my_ ## x ## _ucs2_bin
|
||||||
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
|
#define DEFINE_STRNXFRM_UNICODE_BIN2
|
||||||
#define WEIGHT_MB2(b0,b1) UCS2_CODE(b0,b1)
|
#define MY_MB_WC(cs, pwc, s, e) my_mb_wc_ucs2_quick(pwc, s, e)
|
||||||
|
#define OPTIMIZE_ASCII 0
|
||||||
|
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
|
||||||
|
#define WEIGHT_MB2(b0,b1) UCS2_CODE(b0,b1)
|
||||||
#include "strcoll.ic"
|
#include "strcoll.ic"
|
||||||
|
|
||||||
|
|
||||||
|
@ -3222,7 +3255,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler =
|
||||||
NULL, /* init */
|
NULL, /* init */
|
||||||
my_strnncoll_ucs2_general_ci,
|
my_strnncoll_ucs2_general_ci,
|
||||||
my_strnncollsp_ucs2_general_ci,
|
my_strnncollsp_ucs2_general_ci,
|
||||||
my_strnxfrm_unicode,
|
my_strnxfrm_ucs2_general_ci,
|
||||||
my_strnxfrmlen_unicode,
|
my_strnxfrmlen_unicode,
|
||||||
my_like_range_generic,
|
my_like_range_generic,
|
||||||
my_wildcmp_ucs2_ci,
|
my_wildcmp_ucs2_ci,
|
||||||
|
@ -3238,7 +3271,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler =
|
||||||
NULL, /* init */
|
NULL, /* init */
|
||||||
my_strnncoll_ucs2_bin,
|
my_strnncoll_ucs2_bin,
|
||||||
my_strnncollsp_ucs2_bin,
|
my_strnncollsp_ucs2_bin,
|
||||||
my_strnxfrm_unicode,
|
my_strnxfrm_ucs2_bin,
|
||||||
my_strnxfrmlen_unicode,
|
my_strnxfrmlen_unicode,
|
||||||
my_like_range_generic,
|
my_like_range_generic,
|
||||||
my_wildcmp_ucs2_bin,
|
my_wildcmp_ucs2_bin,
|
||||||
|
@ -3254,7 +3287,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_general_nopad_ci_handler =
|
||||||
NULL, /* init */
|
NULL, /* init */
|
||||||
my_strnncoll_ucs2_general_ci,
|
my_strnncoll_ucs2_general_ci,
|
||||||
my_strnncollsp_ucs2_general_nopad_ci,
|
my_strnncollsp_ucs2_general_nopad_ci,
|
||||||
my_strnxfrm_unicode_nopad,
|
my_strnxfrm_nopad_ucs2_general_ci,
|
||||||
my_strnxfrmlen_unicode,
|
my_strnxfrmlen_unicode,
|
||||||
my_like_range_generic,
|
my_like_range_generic,
|
||||||
my_wildcmp_ucs2_ci,
|
my_wildcmp_ucs2_ci,
|
||||||
|
@ -3270,7 +3303,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_nopad_bin_handler =
|
||||||
NULL, /* init */
|
NULL, /* init */
|
||||||
my_strnncoll_ucs2_bin,
|
my_strnncoll_ucs2_bin,
|
||||||
my_strnncollsp_ucs2_nopad_bin,
|
my_strnncollsp_ucs2_nopad_bin,
|
||||||
my_strnxfrm_unicode_nopad,
|
my_strnxfrm_nopad_ucs2_bin,
|
||||||
my_strnxfrmlen_unicode,
|
my_strnxfrmlen_unicode,
|
||||||
my_like_range_generic,
|
my_like_range_generic,
|
||||||
my_wildcmp_ucs2_bin,
|
my_wildcmp_ucs2_bin,
|
||||||
|
|
31
strings/ctype-unidata.h
Normal file
31
strings/ctype-unidata.h
Normal file
|
@ -0,0 +1,31 @@
|
||||||
|
#ifndef CTYPE_UNIDATA_H_INCLUDED
|
||||||
|
#define CTYPE_UNIDATA_H_INCLUDED
|
||||||
|
/*
|
||||||
|
Copyright (c) 2018 MariaDB Corporation
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; version 2 of the License.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define MY_UNICASE_INFO_DEFAULT_MAXCHAR 0xFFFF
|
||||||
|
extern MY_UNICASE_CHARACTER my_unicase_default_page00[256];
|
||||||
|
extern MY_UNICASE_CHARACTER *my_unicase_default_pages[256];
|
||||||
|
|
||||||
|
size_t my_strxfrm_pad_nweights_unicode(uchar *str, uchar *strend, size_t nweights);
|
||||||
|
size_t my_strxfrm_pad_unicode(uchar *str, uchar *strend);
|
||||||
|
|
||||||
|
|
||||||
|
#define PUT_WC_BE2_HAVE_1BYTE(dst, de, wc) \
|
||||||
|
do { *dst++= (uchar) (wc >> 8); if (dst < de) *dst++= (uchar) (wc & 0xFF); } while(0)
|
||||||
|
|
||||||
|
#endif /* CTYPE_UNIDATA_H_INCLUDED */
|
|
@ -28,6 +28,7 @@
|
||||||
|
|
||||||
|
|
||||||
#include "ctype-utf8.h"
|
#include "ctype-utf8.h"
|
||||||
|
#include "ctype-unidata.h"
|
||||||
|
|
||||||
|
|
||||||
/* Definitions for strcoll.ic */
|
/* Definitions for strcoll.ic */
|
||||||
|
@ -111,7 +112,7 @@ int my_valid_mbcharlen_utf8mb3(const uchar *s, const uchar *e)
|
||||||
|
|
||||||
#include "my_uctype.h"
|
#include "my_uctype.h"
|
||||||
|
|
||||||
static MY_UNICASE_CHARACTER plane00[]={
|
MY_UNICASE_CHARACTER my_unicase_default_page00[]={
|
||||||
{0x0000,0x0000,0x0000}, {0x0001,0x0001,0x0001},
|
{0x0000,0x0000,0x0000}, {0x0001,0x0001,0x0001},
|
||||||
{0x0002,0x0002,0x0002}, {0x0003,0x0003,0x0003},
|
{0x0002,0x0002,0x0002}, {0x0003,0x0003,0x0003},
|
||||||
{0x0004,0x0004,0x0004}, {0x0005,0x0005,0x0005},
|
{0x0004,0x0004,0x0004}, {0x0005,0x0005,0x0005},
|
||||||
|
@ -244,7 +245,7 @@ static MY_UNICASE_CHARACTER plane00[]={
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Almost similar to plane00, but maps sorting order
|
Almost similar to my_unicase_default_page00, but maps sorting order
|
||||||
for U+00DF to 0x00DF instead of 0x0053.
|
for U+00DF to 0x00DF instead of 0x0053.
|
||||||
*/
|
*/
|
||||||
static MY_UNICASE_CHARACTER plane00_mysql500[]={
|
static MY_UNICASE_CHARACTER plane00_mysql500[]={
|
||||||
|
@ -1690,9 +1691,10 @@ static MY_UNICASE_CHARACTER planeFF[]={
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
static MY_UNICASE_CHARACTER *my_unicase_pages_default[256]=
|
MY_UNICASE_CHARACTER *my_unicase_default_pages[256]=
|
||||||
{
|
{
|
||||||
plane00, plane01, plane02, plane03, plane04, plane05, NULL, NULL,
|
my_unicase_default_page00,
|
||||||
|
plane01, plane02, plane03, plane04, plane05, NULL, NULL,
|
||||||
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
||||||
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
||||||
NULL, NULL, NULL, NULL, NULL, NULL, plane1E, plane1F,
|
NULL, NULL, NULL, NULL, NULL, NULL, plane1E, plane1F,
|
||||||
|
@ -1729,8 +1731,8 @@ static MY_UNICASE_CHARACTER *my_unicase_pages_default[256]=
|
||||||
|
|
||||||
MY_UNICASE_INFO my_unicase_default=
|
MY_UNICASE_INFO my_unicase_default=
|
||||||
{
|
{
|
||||||
0xFFFF,
|
MY_UNICASE_INFO_DEFAULT_MAXCHAR,
|
||||||
my_unicase_pages_default
|
my_unicase_default_pages
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -4581,7 +4583,7 @@ my_wildcmp_unicode(CHARSET_INFO *cs,
|
||||||
@return Result length
|
@return Result length
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static size_t
|
size_t
|
||||||
my_strxfrm_pad_nweights_unicode(uchar *str, uchar *strend, size_t nweights)
|
my_strxfrm_pad_nweights_unicode(uchar *str, uchar *strend, size_t nweights)
|
||||||
{
|
{
|
||||||
uchar *str0;
|
uchar *str0;
|
||||||
|
@ -4610,7 +4612,7 @@ my_strxfrm_pad_nweights_unicode(uchar *str, uchar *strend, size_t nweights)
|
||||||
@return Result length
|
@return Result length
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static size_t
|
size_t
|
||||||
my_strxfrm_pad_unicode(uchar *str, uchar *strend)
|
my_strxfrm_pad_unicode(uchar *str, uchar *strend)
|
||||||
{
|
{
|
||||||
uchar *str0= str;
|
uchar *str0= str;
|
||||||
|
@ -4625,95 +4627,6 @@ my_strxfrm_pad_unicode(uchar *str, uchar *strend)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
size_t my_strnxfrm_unicode_internal(CHARSET_INFO *cs,
|
|
||||||
uchar *dst, uchar *de, uint *nweights,
|
|
||||||
const uchar *src, const uchar *se)
|
|
||||||
{
|
|
||||||
my_wc_t UNINIT_VAR(wc);
|
|
||||||
int res;
|
|
||||||
uchar *dst0= dst;
|
|
||||||
MY_UNICASE_INFO *uni_plane= (cs->state & MY_CS_BINSORT) ?
|
|
||||||
NULL : cs->caseinfo;
|
|
||||||
|
|
||||||
DBUG_ASSERT(src || !se);
|
|
||||||
|
|
||||||
for (; dst < de && *nweights; (*nweights)--)
|
|
||||||
{
|
|
||||||
if ((res= cs->cset->mb_wc(cs, &wc, src, se)) <= 0)
|
|
||||||
break;
|
|
||||||
src+= res;
|
|
||||||
|
|
||||||
if (uni_plane)
|
|
||||||
my_tosort_unicode(uni_plane, &wc, cs->state);
|
|
||||||
|
|
||||||
*dst++= (uchar) (wc >> 8);
|
|
||||||
if (dst < de)
|
|
||||||
*dst++= (uchar) (wc & 0xFF);
|
|
||||||
}
|
|
||||||
return dst - dst0;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
Store sorting weights using 2 bytes per character.
|
|
||||||
|
|
||||||
This function is shared between
|
|
||||||
- utf8mb3_general_ci, utf8_bin, ucs2_general_ci, ucs2_bin
|
|
||||||
which support BMP only (U+0000..U+FFFF).
|
|
||||||
- utf8mb4_general_ci, utf16_general_ci, utf32_general_ci,
|
|
||||||
which map all supplementary characters to weight 0xFFFD.
|
|
||||||
*/
|
|
||||||
size_t
|
|
||||||
my_strnxfrm_unicode(CHARSET_INFO *cs,
|
|
||||||
uchar *dst, size_t dstlen, uint nweights,
|
|
||||||
const uchar *src, size_t srclen, uint flags)
|
|
||||||
{
|
|
||||||
uchar *dst0= dst;
|
|
||||||
uchar *de= dst + dstlen;
|
|
||||||
dst+= my_strnxfrm_unicode_internal(cs, dst, de, &nweights,
|
|
||||||
src, src + srclen);
|
|
||||||
DBUG_ASSERT(dst <= de); /* Safety */
|
|
||||||
|
|
||||||
if (dst < de && nweights && (flags & MY_STRXFRM_PAD_WITH_SPACE))
|
|
||||||
dst+= my_strxfrm_pad_nweights_unicode(dst, de, nweights);
|
|
||||||
|
|
||||||
my_strxfrm_desc_and_reverse(dst0, dst, flags, 0);
|
|
||||||
|
|
||||||
if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && dst < de)
|
|
||||||
dst+= my_strxfrm_pad_unicode(dst, de);
|
|
||||||
return dst - dst0;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
size_t
|
|
||||||
my_strnxfrm_unicode_nopad(CHARSET_INFO *cs,
|
|
||||||
uchar *dst, size_t dstlen, uint nweights,
|
|
||||||
const uchar *src, size_t srclen, uint flags)
|
|
||||||
{
|
|
||||||
uchar *dst0= dst;
|
|
||||||
uchar *de= dst + dstlen;
|
|
||||||
dst+= my_strnxfrm_unicode_internal(cs, dst, de, &nweights,
|
|
||||||
src, src + srclen);
|
|
||||||
DBUG_ASSERT(dst <= de); /* Safety */
|
|
||||||
|
|
||||||
if (dst < de && nweights && (flags & MY_STRXFRM_PAD_WITH_SPACE))
|
|
||||||
{
|
|
||||||
size_t len= de - dst;
|
|
||||||
set_if_smaller(len, nweights * 2);
|
|
||||||
memset(dst, 0x00, len);
|
|
||||||
dst+= len;
|
|
||||||
}
|
|
||||||
|
|
||||||
my_strxfrm_desc_and_reverse(dst0, dst, flags, 0);
|
|
||||||
|
|
||||||
if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && dst < de)
|
|
||||||
{
|
|
||||||
memset(dst, 0x00, de - dst);
|
|
||||||
dst= de;
|
|
||||||
}
|
|
||||||
return dst - dst0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
For BMP-only collations that use 2 bytes per weight.
|
For BMP-only collations that use 2 bytes per weight.
|
||||||
*/
|
*/
|
||||||
|
@ -5208,7 +5121,7 @@ int my_strcasecmp_utf8(CHARSET_INFO *cs, const char *s, const char *t)
|
||||||
It represents a single byte character.
|
It represents a single byte character.
|
||||||
Convert it into weight according to collation.
|
Convert it into weight according to collation.
|
||||||
*/
|
*/
|
||||||
s_wc= plane00[(uchar) s[0]].tolower;
|
s_wc= my_unicase_default_page00[(uchar) s[0]].tolower;
|
||||||
s++;
|
s++;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -5250,7 +5163,7 @@ int my_strcasecmp_utf8(CHARSET_INFO *cs, const char *s, const char *t)
|
||||||
if ((uchar) t[0] < 128)
|
if ((uchar) t[0] < 128)
|
||||||
{
|
{
|
||||||
/* Convert single byte character into weight */
|
/* Convert single byte character into weight */
|
||||||
t_wc= plane00[(uchar) t[0]].tolower;
|
t_wc= my_unicase_default_page00[(uchar) t[0]].tolower;
|
||||||
t++;
|
t++;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -5313,14 +5226,14 @@ int my_charlen_utf8(CHARSET_INFO *cs __attribute__((unused)),
|
||||||
|
|
||||||
static inline int my_weight_mb1_utf8_general_ci(uchar b)
|
static inline int my_weight_mb1_utf8_general_ci(uchar b)
|
||||||
{
|
{
|
||||||
return (int) plane00[b & 0xFF].sort;
|
return (int) my_unicase_default_page00[b & 0xFF].sort;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static inline int my_weight_mb2_utf8_general_ci(uchar b0, uchar b1)
|
static inline int my_weight_mb2_utf8_general_ci(uchar b0, uchar b1)
|
||||||
{
|
{
|
||||||
my_wc_t wc= UTF8MB2_CODE(b0, b1);
|
my_wc_t wc= UTF8MB2_CODE(b0, b1);
|
||||||
MY_UNICASE_CHARACTER *page= my_unicase_pages_default[wc >> 8];
|
MY_UNICASE_CHARACTER *page= my_unicase_default_pages[wc >> 8];
|
||||||
return (int) (page ? page[wc & 0xFF].sort : wc);
|
return (int) (page ? page[wc & 0xFF].sort : wc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5328,16 +5241,23 @@ static inline int my_weight_mb2_utf8_general_ci(uchar b0, uchar b1)
|
||||||
static inline int my_weight_mb3_utf8_general_ci(uchar b0, uchar b1, uchar b2)
|
static inline int my_weight_mb3_utf8_general_ci(uchar b0, uchar b1, uchar b2)
|
||||||
{
|
{
|
||||||
my_wc_t wc= UTF8MB3_CODE(b0, b1, b2);
|
my_wc_t wc= UTF8MB3_CODE(b0, b1, b2);
|
||||||
MY_UNICASE_CHARACTER *page= my_unicase_pages_default[wc >> 8];
|
MY_UNICASE_CHARACTER *page= my_unicase_default_pages[wc >> 8];
|
||||||
return (int) (page ? page[wc & 0xFF].sort : wc);
|
return (int) (page ? page[wc & 0xFF].sort : wc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf8_general_ci
|
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf8_general_ci
|
||||||
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
|
#define DEFINE_STRNXFRM_UNICODE
|
||||||
#define WEIGHT_MB1(x) my_weight_mb1_utf8_general_ci(x)
|
#define DEFINE_STRNXFRM_UNICODE_NOPAD
|
||||||
#define WEIGHT_MB2(x,y) my_weight_mb2_utf8_general_ci(x,y)
|
#define MY_MB_WC(cs, pwc, s, e) my_mb_wc_utf8mb3_quick(pwc, s, e)
|
||||||
#define WEIGHT_MB3(x,y,z) my_weight_mb3_utf8_general_ci(x,y,z)
|
#define OPTIMIZE_ASCII 1
|
||||||
|
#define UNICASE_MAXCHAR MY_UNICASE_INFO_DEFAULT_MAXCHAR
|
||||||
|
#define UNICASE_PAGE0 my_unicase_default_page00
|
||||||
|
#define UNICASE_PAGES my_unicase_default_pages
|
||||||
|
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
|
||||||
|
#define WEIGHT_MB1(x) my_weight_mb1_utf8_general_ci(x)
|
||||||
|
#define WEIGHT_MB2(x,y) my_weight_mb2_utf8_general_ci(x,y)
|
||||||
|
#define WEIGHT_MB3(x,y,z) my_weight_mb3_utf8_general_ci(x,y,z)
|
||||||
#include "strcoll.ic"
|
#include "strcoll.ic"
|
||||||
|
|
||||||
|
|
||||||
|
@ -5373,19 +5293,28 @@ my_weight_mb3_utf8_general_mysql500_ci(uchar b0, uchar b1, uchar b2)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf8_general_mysql500_ci
|
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf8_general_mysql500_ci
|
||||||
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
|
#define DEFINE_STRNXFRM_UNICODE
|
||||||
#define WEIGHT_MB1(x) my_weight_mb1_utf8_general_mysql500_ci(x)
|
#define MY_MB_WC(cs, pwc, s, e) my_mb_wc_utf8mb3_quick(pwc, s, e)
|
||||||
#define WEIGHT_MB2(x,y) my_weight_mb2_utf8_general_mysql500_ci(x,y)
|
#define OPTIMIZE_ASCII 1
|
||||||
#define WEIGHT_MB3(x,y,z) my_weight_mb3_utf8_general_mysql500_ci(x,y,z)
|
#define UNICASE_MAXCHAR MY_UNICASE_INFO_DEFAULT_MAXCHAR
|
||||||
|
#define UNICASE_PAGE0 plane00_mysql500
|
||||||
|
#define UNICASE_PAGES my_unicase_pages_mysql500
|
||||||
|
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
|
||||||
|
#define WEIGHT_MB1(x) my_weight_mb1_utf8_general_mysql500_ci(x)
|
||||||
|
#define WEIGHT_MB2(x,y) my_weight_mb2_utf8_general_mysql500_ci(x,y)
|
||||||
|
#define WEIGHT_MB3(x,y,z) my_weight_mb3_utf8_general_mysql500_ci(x,y,z)
|
||||||
#include "strcoll.ic"
|
#include "strcoll.ic"
|
||||||
|
|
||||||
|
|
||||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf8_bin
|
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf8_bin
|
||||||
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
|
#define DEFINE_STRNXFRM_UNICODE_BIN2
|
||||||
#define WEIGHT_MB1(x) ((int) (uchar) (x))
|
#define MY_MB_WC(cs, pwc, s, e) my_mb_wc_utf8mb3_quick(pwc, s, e)
|
||||||
#define WEIGHT_MB2(x,y) ((int) UTF8MB2_CODE(x,y))
|
#define OPTIMIZE_ASCII 1
|
||||||
#define WEIGHT_MB3(x,y,z) ((int) UTF8MB3_CODE(x,y,z))
|
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
|
||||||
|
#define WEIGHT_MB1(x) ((int) (uchar) (x))
|
||||||
|
#define WEIGHT_MB2(x,y) ((int) UTF8MB2_CODE(x,y))
|
||||||
|
#define WEIGHT_MB3(x,y,z) ((int) UTF8MB3_CODE(x,y,z))
|
||||||
#include "strcoll.ic"
|
#include "strcoll.ic"
|
||||||
|
|
||||||
|
|
||||||
|
@ -5434,7 +5363,7 @@ static MY_COLLATION_HANDLER my_collation_utf8_general_ci_handler =
|
||||||
NULL, /* init */
|
NULL, /* init */
|
||||||
my_strnncoll_utf8_general_ci,
|
my_strnncoll_utf8_general_ci,
|
||||||
my_strnncollsp_utf8_general_ci,
|
my_strnncollsp_utf8_general_ci,
|
||||||
my_strnxfrm_unicode,
|
my_strnxfrm_utf8_general_ci,
|
||||||
my_strnxfrmlen_unicode,
|
my_strnxfrmlen_unicode,
|
||||||
my_like_range_mb,
|
my_like_range_mb,
|
||||||
my_wildcmp_utf8,
|
my_wildcmp_utf8,
|
||||||
|
@ -5450,7 +5379,7 @@ static MY_COLLATION_HANDLER my_collation_utf8_general_mysql500_ci_handler =
|
||||||
NULL, /* init */
|
NULL, /* init */
|
||||||
my_strnncoll_utf8_general_mysql500_ci,
|
my_strnncoll_utf8_general_mysql500_ci,
|
||||||
my_strnncollsp_utf8_general_mysql500_ci,
|
my_strnncollsp_utf8_general_mysql500_ci,
|
||||||
my_strnxfrm_unicode,
|
my_strnxfrm_utf8_general_mysql500_ci,
|
||||||
my_strnxfrmlen_unicode,
|
my_strnxfrmlen_unicode,
|
||||||
my_like_range_mb,
|
my_like_range_mb,
|
||||||
my_wildcmp_utf8,
|
my_wildcmp_utf8,
|
||||||
|
@ -5466,7 +5395,7 @@ static MY_COLLATION_HANDLER my_collation_utf8_bin_handler =
|
||||||
NULL, /* init */
|
NULL, /* init */
|
||||||
my_strnncoll_utf8_bin,
|
my_strnncoll_utf8_bin,
|
||||||
my_strnncollsp_utf8_bin,
|
my_strnncollsp_utf8_bin,
|
||||||
my_strnxfrm_unicode,
|
my_strnxfrm_utf8_bin,
|
||||||
my_strnxfrmlen_unicode,
|
my_strnxfrmlen_unicode,
|
||||||
my_like_range_mb,
|
my_like_range_mb,
|
||||||
my_wildcmp_mb_bin,
|
my_wildcmp_mb_bin,
|
||||||
|
@ -5482,7 +5411,7 @@ static MY_COLLATION_HANDLER my_collation_utf8_general_nopad_ci_handler =
|
||||||
NULL, /* init */
|
NULL, /* init */
|
||||||
my_strnncoll_utf8_general_ci,
|
my_strnncoll_utf8_general_ci,
|
||||||
my_strnncollsp_utf8_general_nopad_ci,
|
my_strnncollsp_utf8_general_nopad_ci,
|
||||||
my_strnxfrm_unicode_nopad,
|
my_strnxfrm_nopad_utf8_general_ci,
|
||||||
my_strnxfrmlen_unicode,
|
my_strnxfrmlen_unicode,
|
||||||
my_like_range_mb,
|
my_like_range_mb,
|
||||||
my_wildcmp_utf8,
|
my_wildcmp_utf8,
|
||||||
|
@ -5498,7 +5427,7 @@ static MY_COLLATION_HANDLER my_collation_utf8_nopad_bin_handler =
|
||||||
NULL, /* init */
|
NULL, /* init */
|
||||||
my_strnncoll_utf8_bin,
|
my_strnncoll_utf8_bin,
|
||||||
my_strnncollsp_utf8_nopad_bin,
|
my_strnncollsp_utf8_nopad_bin,
|
||||||
my_strnxfrm_unicode_nopad,
|
my_strnxfrm_nopad_utf8_bin,
|
||||||
my_strnxfrmlen_unicode,
|
my_strnxfrmlen_unicode,
|
||||||
my_like_range_mb,
|
my_like_range_mb,
|
||||||
my_wildcmp_mb_bin,
|
my_wildcmp_mb_bin,
|
||||||
|
@ -5827,7 +5756,7 @@ static MY_COLLATION_HANDLER my_collation_cs_handler =
|
||||||
NULL, /* init */
|
NULL, /* init */
|
||||||
my_strnncoll_utf8_cs,
|
my_strnncoll_utf8_cs,
|
||||||
my_strnncollsp_utf8_cs,
|
my_strnncollsp_utf8_cs,
|
||||||
my_strnxfrm_unicode,
|
my_strnxfrm_utf8_general_ci,
|
||||||
my_strnxfrmlen_unicode,
|
my_strnxfrmlen_unicode,
|
||||||
my_like_range_simple,
|
my_like_range_simple,
|
||||||
my_wildcmp_mb,
|
my_wildcmp_mb,
|
||||||
|
@ -7112,13 +7041,30 @@ my_charlen_filename(CHARSET_INFO *cs, const uchar *str, const uchar *end)
|
||||||
#undef DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
|
#undef DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
|
||||||
/* my_well_formed_char_length_filename */
|
/* my_well_formed_char_length_filename */
|
||||||
|
|
||||||
|
#define MY_FUNCTION_NAME(x) my_ ## x ## _filename
|
||||||
|
#define DEFINE_STRNNCOLL 0
|
||||||
|
#define DEFINE_STRNXFRM_UNICODE
|
||||||
|
#define MY_MB_WC(cs, pwc, s, e) my_mb_wc_filename(cs, pwc, s, e)
|
||||||
|
#define OPTIMIZE_ASCII 0
|
||||||
|
#define UNICASE_MAXCHAR MY_UNICASE_INFO_DEFAULT_MAXCHAR
|
||||||
|
#define UNICASE_PAGE0 my_unicase_default_page00
|
||||||
|
#define UNICASE_PAGES my_unicase_default_pages
|
||||||
|
|
||||||
|
/*
|
||||||
|
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
|
||||||
|
#define WEIGHT_MB1(x) my_weight_mb1_utf8_general_ci(x)
|
||||||
|
#define WEIGHT_MB2(x,y) my_weight_mb2_utf8_general_ci(x,y)
|
||||||
|
#define WEIGHT_MB3(x,y,z) my_weight_mb3_utf8_general_ci(x,y,z)
|
||||||
|
*/
|
||||||
|
#include "strcoll.ic"
|
||||||
|
|
||||||
|
|
||||||
static MY_COLLATION_HANDLER my_collation_filename_handler =
|
static MY_COLLATION_HANDLER my_collation_filename_handler =
|
||||||
{
|
{
|
||||||
NULL, /* init */
|
NULL, /* init */
|
||||||
my_strnncoll_simple,
|
my_strnncoll_simple,
|
||||||
my_strnncollsp_simple,
|
my_strnncollsp_simple,
|
||||||
my_strnxfrm_unicode,
|
my_strnxfrm_filename,
|
||||||
my_strnxfrmlen_unicode,
|
my_strnxfrmlen_unicode,
|
||||||
my_like_range_mb,
|
my_like_range_mb,
|
||||||
my_wildcmp_utf8,
|
my_wildcmp_utf8,
|
||||||
|
@ -7607,7 +7553,7 @@ my_strcasecmp_utf8mb4(CHARSET_INFO *cs, const char *s, const char *t)
|
||||||
It represents a single byte character.
|
It represents a single byte character.
|
||||||
Convert it into weight according to collation.
|
Convert it into weight according to collation.
|
||||||
*/
|
*/
|
||||||
s_wc= plane00[(uchar) s[0]].tolower;
|
s_wc= my_unicase_default_page00[(uchar) s[0]].tolower;
|
||||||
s++;
|
s++;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -7631,7 +7577,7 @@ my_strcasecmp_utf8mb4(CHARSET_INFO *cs, const char *s, const char *t)
|
||||||
if ((uchar) t[0] < 128)
|
if ((uchar) t[0] < 128)
|
||||||
{
|
{
|
||||||
/* Convert single byte character into weight */
|
/* Convert single byte character into weight */
|
||||||
t_wc= plane00[(uchar) t[0]].tolower;
|
t_wc= my_unicase_default_page00[(uchar) t[0]].tolower;
|
||||||
t++;
|
t++;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -7702,6 +7648,13 @@ my_charlen_utf8mb4(CHARSET_INFO *cs __attribute__((unused)),
|
||||||
|
|
||||||
|
|
||||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf8mb4_general_ci
|
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf8mb4_general_ci
|
||||||
|
#define DEFINE_STRNXFRM_UNICODE
|
||||||
|
#define DEFINE_STRNXFRM_UNICODE_NOPAD
|
||||||
|
#define MY_MB_WC(cs, pwc, s, e) my_mb_wc_utf8mb4_quick(pwc, s, e)
|
||||||
|
#define OPTIMIZE_ASCII 1
|
||||||
|
#define UNICASE_MAXCHAR MY_UNICASE_INFO_DEFAULT_MAXCHAR
|
||||||
|
#define UNICASE_PAGE0 my_unicase_default_page00
|
||||||
|
#define UNICASE_PAGES my_unicase_default_pages
|
||||||
#define IS_MB4_CHAR(b0,b1,b2,b3) IS_UTF8MB4_STEP3(b0,b1,b2,b3)
|
#define IS_MB4_CHAR(b0,b1,b2,b3) IS_UTF8MB4_STEP3(b0,b1,b2,b3)
|
||||||
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
|
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
|
||||||
#define WEIGHT_MB1(b0) my_weight_mb1_utf8_general_ci(b0)
|
#define WEIGHT_MB1(b0) my_weight_mb1_utf8_general_ci(b0)
|
||||||
|
@ -7752,7 +7705,7 @@ static MY_COLLATION_HANDLER my_collation_utf8mb4_general_ci_handler=
|
||||||
NULL, /* init */
|
NULL, /* init */
|
||||||
my_strnncoll_utf8mb4_general_ci,
|
my_strnncoll_utf8mb4_general_ci,
|
||||||
my_strnncollsp_utf8mb4_general_ci,
|
my_strnncollsp_utf8mb4_general_ci,
|
||||||
my_strnxfrm_unicode,
|
my_strnxfrm_utf8mb4_general_ci,
|
||||||
my_strnxfrmlen_unicode,
|
my_strnxfrmlen_unicode,
|
||||||
my_like_range_mb,
|
my_like_range_mb,
|
||||||
my_wildcmp_utf8mb4,
|
my_wildcmp_utf8mb4,
|
||||||
|
@ -7784,7 +7737,7 @@ static MY_COLLATION_HANDLER my_collation_utf8mb4_general_nopad_ci_handler=
|
||||||
NULL, /* init */
|
NULL, /* init */
|
||||||
my_strnncoll_utf8mb4_general_ci,
|
my_strnncoll_utf8mb4_general_ci,
|
||||||
my_strnncollsp_utf8mb4_general_nopad_ci,
|
my_strnncollsp_utf8mb4_general_nopad_ci,
|
||||||
my_strnxfrm_unicode_nopad,
|
my_strnxfrm_nopad_utf8mb4_general_ci,
|
||||||
my_strnxfrmlen_unicode,
|
my_strnxfrmlen_unicode,
|
||||||
my_like_range_mb,
|
my_like_range_mb,
|
||||||
my_wildcmp_utf8mb4,
|
my_wildcmp_utf8mb4,
|
||||||
|
|
|
@ -15,11 +15,18 @@
|
||||||
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
#ifndef MY_FUNCTION_NAME
|
#ifndef MY_FUNCTION_NAME
|
||||||
#error MY_FUNCTION_NAME is not defined
|
#error MY_FUNCTION_NAME is not defined
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
Define strnncoll() and strnncollsp() by default,
|
||||||
|
unless "#define DEFINE_STRNNCOLL 0" is specified.
|
||||||
|
*/
|
||||||
|
#ifndef DEFINE_STRNNCOLL
|
||||||
|
#define DEFINE_STRNNCOLL 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
The weight for automatically padded spaces when comparing strings with
|
The weight for automatically padded spaces when comparing strings with
|
||||||
|
@ -54,6 +61,8 @@
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#if DEFINE_STRNNCOLL
|
||||||
|
|
||||||
/**
|
/**
|
||||||
Scan a valid character, or a bad byte, or an auto-padded space
|
Scan a valid character, or a bad byte, or an auto-padded space
|
||||||
from a string and calculate the weight of the scanned sequence.
|
from a string and calculate the weight of the scanned sequence.
|
||||||
|
@ -278,6 +287,8 @@ MY_FUNCTION_NAME(strnncollsp)(CHARSET_INFO *cs __attribute__((unused)),
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#endif /* DEFINE_STRNNCOLL */
|
||||||
|
|
||||||
|
|
||||||
#ifdef DEFINE_STRNXFRM
|
#ifdef DEFINE_STRNXFRM
|
||||||
#ifndef WEIGHT_MB2_FRM
|
#ifndef WEIGHT_MB2_FRM
|
||||||
|
@ -322,11 +333,261 @@ MY_FUNCTION_NAME(strnxfrm)(CHARSET_INFO *cs,
|
||||||
#endif /* DEFINE_STRNXFRM */
|
#endif /* DEFINE_STRNXFRM */
|
||||||
|
|
||||||
|
|
||||||
|
#if defined(DEFINE_STRNXFRM_UNICODE) || defined(DEFINE_STRNXFRM_UNICODE_NOPAD)
|
||||||
|
|
||||||
|
/*
|
||||||
|
Store sorting weights using 2 bytes per character.
|
||||||
|
|
||||||
|
This function is shared between
|
||||||
|
- utf8mb3_general_ci, utf8_bin, ucs2_general_ci, ucs2_bin
|
||||||
|
which support BMP only (U+0000..U+FFFF).
|
||||||
|
- utf8mb4_general_ci, utf16_general_ci, utf32_general_ci,
|
||||||
|
which map all supplementary characters to weight 0xFFFD.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MY_MB_WC
|
||||||
|
#error MY_MB_WC must be defined for DEFINE_STRNXFRM_UNICODE
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef OPTIMIZE_ASCII
|
||||||
|
#error OPTIMIZE_ASCII must be defined for DEFINE_STRNXFRM_UNICODE
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef UNICASE_MAXCHAR
|
||||||
|
#error UNICASE_MAXCHAR must be defined for DEFINE_STRNXFRM_UNICODE
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef UNICASE_PAGE0
|
||||||
|
#error UNICASE_PAGE0 must be defined for DEFINE_STRNXFRM_UNICODE
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef UNICASE_PAGES
|
||||||
|
#error UNICASE_PAGES must be defined for DEFINE_STRNXFRM_UNICODE
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
static size_t
|
||||||
|
MY_FUNCTION_NAME(strnxfrm_internal)(CHARSET_INFO *cs,
|
||||||
|
uchar *dst, uchar *de,
|
||||||
|
uint *nweights,
|
||||||
|
const uchar *src, const uchar *se)
|
||||||
|
{
|
||||||
|
my_wc_t UNINIT_VAR(wc);
|
||||||
|
uchar *dst0= dst;
|
||||||
|
|
||||||
|
DBUG_ASSERT(src || !se);
|
||||||
|
DBUG_ASSERT((cs->state & MY_CS_LOWER_SORT) == 0);
|
||||||
|
DBUG_ASSERT(0x7F <= UNICASE_MAXCHAR);
|
||||||
|
|
||||||
|
for (; dst < de && *nweights; (*nweights)--)
|
||||||
|
{
|
||||||
|
int res;
|
||||||
|
#if OPTIMIZE_ASCII
|
||||||
|
if (src >= se)
|
||||||
|
break;
|
||||||
|
if (src[0] <= 0x7F)
|
||||||
|
{
|
||||||
|
wc= UNICASE_PAGE0[*src++].sort;
|
||||||
|
PUT_WC_BE2_HAVE_1BYTE(dst, de, wc);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
if ((res= MY_MB_WC(cs, &wc, src, se)) <= 0)
|
||||||
|
break;
|
||||||
|
src+= res;
|
||||||
|
if (wc <= UNICASE_MAXCHAR)
|
||||||
|
{
|
||||||
|
MY_UNICASE_CHARACTER *page;
|
||||||
|
if ((page= UNICASE_PAGES[wc >> 8]))
|
||||||
|
wc= page[wc & 0xFF].sort;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
wc= MY_CS_REPLACEMENT_CHARACTER;
|
||||||
|
PUT_WC_BE2_HAVE_1BYTE(dst, de, wc);
|
||||||
|
}
|
||||||
|
return dst - dst0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static size_t
|
||||||
|
MY_FUNCTION_NAME(strnxfrm)(CHARSET_INFO *cs,
|
||||||
|
uchar *dst, size_t dstlen, uint nweights,
|
||||||
|
const uchar *src, size_t srclen, uint flags)
|
||||||
|
{
|
||||||
|
uchar *dst0= dst;
|
||||||
|
uchar *de= dst + dstlen;
|
||||||
|
dst+= MY_FUNCTION_NAME(strnxfrm_internal)(cs, dst, de, &nweights,
|
||||||
|
src, src + srclen);
|
||||||
|
DBUG_ASSERT(dst <= de); /* Safety */
|
||||||
|
|
||||||
|
if (dst < de && nweights && (flags & MY_STRXFRM_PAD_WITH_SPACE))
|
||||||
|
dst+= my_strxfrm_pad_nweights_unicode(dst, de, nweights);
|
||||||
|
|
||||||
|
my_strxfrm_desc_and_reverse(dst0, dst, flags, 0);
|
||||||
|
|
||||||
|
if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && dst < de)
|
||||||
|
dst+= my_strxfrm_pad_unicode(dst, de);
|
||||||
|
return dst - dst0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef DEFINE_STRNXFRM_UNICODE_NOPAD
|
||||||
|
static size_t
|
||||||
|
MY_FUNCTION_NAME(strnxfrm_nopad)(CHARSET_INFO *cs,
|
||||||
|
uchar *dst, size_t dstlen,
|
||||||
|
uint nweights,
|
||||||
|
const uchar *src, size_t srclen, uint flags)
|
||||||
|
{
|
||||||
|
uchar *dst0= dst;
|
||||||
|
uchar *de= dst + dstlen;
|
||||||
|
dst+= MY_FUNCTION_NAME(strnxfrm_internal)(cs, dst, de, &nweights,
|
||||||
|
src, src + srclen);
|
||||||
|
DBUG_ASSERT(dst <= de); /* Safety */
|
||||||
|
|
||||||
|
if (dst < de && nweights && (flags & MY_STRXFRM_PAD_WITH_SPACE))
|
||||||
|
{
|
||||||
|
size_t len= de - dst;
|
||||||
|
set_if_smaller(len, nweights * 2);
|
||||||
|
memset(dst, 0x00, len);
|
||||||
|
dst+= len;
|
||||||
|
}
|
||||||
|
|
||||||
|
my_strxfrm_desc_and_reverse(dst0, dst, flags, 0);
|
||||||
|
|
||||||
|
if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && dst < de)
|
||||||
|
{
|
||||||
|
memset(dst, 0x00, de - dst);
|
||||||
|
dst= de;
|
||||||
|
}
|
||||||
|
return dst - dst0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* DEFINE_STRNXFRM_UNICODE || DEFINE_STRNXFRM_UNICODE_NOPAD */
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef DEFINE_STRNXFRM_UNICODE_BIN2
|
||||||
|
|
||||||
|
/*
|
||||||
|
Store sorting weights using 2 bytes per character.
|
||||||
|
|
||||||
|
These functions are shared between
|
||||||
|
- utf8mb3_general_ci, utf8_bin, ucs2_general_ci, ucs2_bin
|
||||||
|
which support BMP only (U+0000..U+FFFF).
|
||||||
|
- utf8mb4_general_ci, utf16_general_ci, utf32_general_ci,
|
||||||
|
which map all supplementary characters to weight 0xFFFD.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MY_MB_WC
|
||||||
|
#error MY_MB_WC must be defined for DEFINE_STRNXFRM_UNICODE_BIN2
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef OPTIMIZE_ASCII
|
||||||
|
#error OPTIMIZE_ASCII must be defined for DEFINE_STRNXFRM_UNICODE_BIN2
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
static size_t
|
||||||
|
MY_FUNCTION_NAME(strnxfrm_internal)(CHARSET_INFO *cs,
|
||||||
|
uchar *dst, uchar *de,
|
||||||
|
uint *nweights,
|
||||||
|
const uchar *src,
|
||||||
|
const uchar *se)
|
||||||
|
{
|
||||||
|
my_wc_t UNINIT_VAR(wc);
|
||||||
|
uchar *dst0= dst;
|
||||||
|
|
||||||
|
DBUG_ASSERT(src || !se);
|
||||||
|
|
||||||
|
for (; dst < de && *nweights; (*nweights)--)
|
||||||
|
{
|
||||||
|
int res;
|
||||||
|
#if OPTIMIZE_ASCII
|
||||||
|
if (src >= se)
|
||||||
|
break;
|
||||||
|
if (src[0] <= 0x7F)
|
||||||
|
{
|
||||||
|
wc= *src++;
|
||||||
|
PUT_WC_BE2_HAVE_1BYTE(dst, de, wc);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
if ((res= MY_MB_WC(cs, &wc, src, se)) <= 0)
|
||||||
|
break;
|
||||||
|
src+= res;
|
||||||
|
if (wc > 0xFFFF)
|
||||||
|
wc= MY_CS_REPLACEMENT_CHARACTER;
|
||||||
|
PUT_WC_BE2_HAVE_1BYTE(dst, de, wc);
|
||||||
|
}
|
||||||
|
return dst - dst0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static size_t
|
||||||
|
MY_FUNCTION_NAME(strnxfrm)(CHARSET_INFO *cs,
|
||||||
|
uchar *dst, size_t dstlen, uint nweights,
|
||||||
|
const uchar *src, size_t srclen, uint flags)
|
||||||
|
{
|
||||||
|
uchar *dst0= dst;
|
||||||
|
uchar *de= dst + dstlen;
|
||||||
|
dst+= MY_FUNCTION_NAME(strnxfrm_internal)(cs, dst, de, &nweights,
|
||||||
|
src, src + srclen);
|
||||||
|
DBUG_ASSERT(dst <= de); /* Safety */
|
||||||
|
|
||||||
|
if (dst < de && nweights && (flags & MY_STRXFRM_PAD_WITH_SPACE))
|
||||||
|
dst+= my_strxfrm_pad_nweights_unicode(dst, de, nweights);
|
||||||
|
|
||||||
|
my_strxfrm_desc_and_reverse(dst0, dst, flags, 0);
|
||||||
|
|
||||||
|
if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && dst < de)
|
||||||
|
dst+= my_strxfrm_pad_unicode(dst, de);
|
||||||
|
return dst - dst0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static size_t
|
||||||
|
MY_FUNCTION_NAME(strnxfrm_nopad)(CHARSET_INFO *cs,
|
||||||
|
uchar *dst, size_t dstlen, uint nweights,
|
||||||
|
const uchar *src, size_t srclen, uint flags)
|
||||||
|
{
|
||||||
|
uchar *dst0= dst;
|
||||||
|
uchar *de= dst + dstlen;
|
||||||
|
dst+= MY_FUNCTION_NAME(strnxfrm_internal)(cs, dst, de, &nweights,
|
||||||
|
src, src + srclen);
|
||||||
|
DBUG_ASSERT(dst <= de); /* Safety */
|
||||||
|
|
||||||
|
if (dst < de && nweights && (flags & MY_STRXFRM_PAD_WITH_SPACE))
|
||||||
|
{
|
||||||
|
size_t len= de - dst;
|
||||||
|
set_if_smaller(len, nweights * 2);
|
||||||
|
memset(dst, 0x00, len);
|
||||||
|
dst+= len;
|
||||||
|
}
|
||||||
|
|
||||||
|
my_strxfrm_desc_and_reverse(dst0, dst, flags, 0);
|
||||||
|
|
||||||
|
if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && dst < de)
|
||||||
|
{
|
||||||
|
memset(dst, 0x00, de - dst);
|
||||||
|
dst= de;
|
||||||
|
}
|
||||||
|
return dst - dst0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* DEFINE_STRNXFRM_UNICODE_BIN2 */
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
We usually include this file at least two times from the same source file,
|
We usually include this file at least two times from the same source file,
|
||||||
for the _ci and the _bin collations. Prepare for the second inclusion.
|
for the _ci and the _bin collations. Prepare for the second inclusion.
|
||||||
*/
|
*/
|
||||||
#undef MY_FUNCTION_NAME
|
#undef MY_FUNCTION_NAME
|
||||||
|
#undef MY_MB_WC
|
||||||
|
#undef OPTIMIZE_ASCII
|
||||||
|
#undef UNICASE_MAXCHAR
|
||||||
|
#undef UNICASE_PAGE0
|
||||||
|
#undef UNICASE_PAGES
|
||||||
#undef WEIGHT_ILSEQ
|
#undef WEIGHT_ILSEQ
|
||||||
#undef WEIGHT_MB1
|
#undef WEIGHT_MB1
|
||||||
#undef WEIGHT_MB2
|
#undef WEIGHT_MB2
|
||||||
|
@ -335,4 +596,8 @@ MY_FUNCTION_NAME(strnxfrm)(CHARSET_INFO *cs,
|
||||||
#undef WEIGHT_PAD_SPACE
|
#undef WEIGHT_PAD_SPACE
|
||||||
#undef WEIGHT_MB2_FRM
|
#undef WEIGHT_MB2_FRM
|
||||||
#undef DEFINE_STRNXFRM
|
#undef DEFINE_STRNXFRM
|
||||||
|
#undef DEFINE_STRNXFRM_UNICODE
|
||||||
|
#undef DEFINE_STRNXFRM_UNICODE_NOPAD
|
||||||
|
#undef DEFINE_STRNXFRM_UNICODE_BIN2
|
||||||
|
#undef DEFINE_STRNNCOLL
|
||||||
#undef DEFINE_STRNNCOLLSP_NOPAD
|
#undef DEFINE_STRNNCOLLSP_NOPAD
|
||||||
|
|
Loading…
Add table
Reference in a new issue