2018-10-19 12:20:31 +02:00
|
|
|
#ifndef CTYPE_UNIDATA_H_INCLUDED
|
|
|
|
#define CTYPE_UNIDATA_H_INCLUDED
|
|
|
|
/*
|
2023-02-24 16:22:32 +01:00
|
|
|
Copyright (c) 2018, 2023 MariaDB Corporation
|
2018-10-19 12:20:31 +02:00
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation; version 2 of the License.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
2023-02-24 16:22:32 +01:00
|
|
|
extern const uint16 weight_general_ci_page00[256];
|
|
|
|
extern const uint16 *weight_general_ci_index[256];
|
|
|
|
extern const uint16 weight_general_mysql500_ci_page00[256];
|
|
|
|
extern const uint16 *weight_general_mysql500_ci_index[256];
|
2023-02-28 07:49:25 +01:00
|
|
|
|
2023-02-24 16:22:32 +01:00
|
|
|
extern const MY_CASEFOLD_CHARACTER u300_casefold_page00[256];
|
2023-04-18 04:44:03 +02:00
|
|
|
|
|
|
|
static inline my_wc_t my_u300_tolower_7bit(uchar ch)
|
|
|
|
{
|
2023-02-24 16:22:32 +01:00
|
|
|
return u300_casefold_page00[ch].tolower;
|
2023-04-18 04:44:03 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline my_wc_t my_u300_toupper_7bit(uchar ch)
|
|
|
|
{
|
2023-02-24 16:22:32 +01:00
|
|
|
return u300_casefold_page00[ch].toupper;
|
2023-04-18 04:44:03 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2023-02-24 16:22:32 +01:00
|
|
|
static inline my_wc_t my_general_ci_bmp_char_to_weight(my_wc_t wc)
|
|
|
|
{
|
|
|
|
const uint16 *page;
|
|
|
|
DBUG_ASSERT((wc >> 8) < array_elements(weight_general_ci_index));
|
|
|
|
page= weight_general_ci_index[wc >> 8];
|
|
|
|
return page ? page[wc & 0xFF] : wc;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static inline my_wc_t my_general_ci_char_to_weight(my_wc_t wc)
|
|
|
|
{
|
|
|
|
if ((wc >> 8) < array_elements(weight_general_ci_index))
|
|
|
|
return my_general_ci_bmp_char_to_weight(wc);
|
|
|
|
return MY_CS_REPLACEMENT_CHARACTER;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static inline my_wc_t my_general_mysql500_ci_bmp_char_to_weight(my_wc_t wc)
|
|
|
|
{
|
|
|
|
const uint16 *page;
|
|
|
|
DBUG_ASSERT((wc >> 8) < array_elements(weight_general_mysql500_ci_index));
|
|
|
|
page= weight_general_mysql500_ci_index[wc >> 8];
|
|
|
|
return page ? page[wc & 0xFF] : wc;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static inline void my_tosort_unicode_bmp(MY_CASEFOLD_INFO *uni_plane,
|
2023-04-18 07:40:41 +02:00
|
|
|
my_wc_t *wc)
|
|
|
|
{
|
2023-02-24 16:22:32 +01:00
|
|
|
const uint16 *page;
|
2023-04-18 07:40:41 +02:00
|
|
|
DBUG_ASSERT(*wc <= uni_plane->maxchar);
|
2023-02-24 16:22:32 +01:00
|
|
|
if ((page= uni_plane->simple_weight[*wc >> 8]))
|
|
|
|
*wc= page[*wc & 0xFF];
|
2023-04-18 07:40:41 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2023-02-24 16:22:32 +01:00
|
|
|
static inline void my_tosort_unicode(MY_CASEFOLD_INFO *uni_plane,
|
2023-04-18 07:40:41 +02:00
|
|
|
my_wc_t *wc)
|
|
|
|
{
|
|
|
|
if (*wc <= uni_plane->maxchar)
|
|
|
|
{
|
2023-02-24 16:22:32 +01:00
|
|
|
const uint16 *page;
|
|
|
|
if ((page= uni_plane->simple_weight[*wc >> 8]))
|
|
|
|
*wc= page[*wc & 0xFF];
|
2023-04-18 07:40:41 +02:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
*wc= MY_CS_REPLACEMENT_CHARACTER;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2023-04-18 04:44:03 +02:00
|
|
|
static inline void
|
2023-02-24 16:22:32 +01:00
|
|
|
my_tolower_unicode_bmp(MY_CASEFOLD_INFO *uni_plane, my_wc_t *wc)
|
2023-04-18 04:44:03 +02:00
|
|
|
{
|
2023-02-24 16:22:32 +01:00
|
|
|
const MY_CASEFOLD_CHARACTER *page;
|
2023-04-18 04:44:03 +02:00
|
|
|
DBUG_ASSERT(*wc <= uni_plane->maxchar);
|
|
|
|
if ((page= uni_plane->page[*wc >> 8]))
|
|
|
|
*wc= page[*wc & 0xFF].tolower;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static inline void
|
2023-02-24 16:22:32 +01:00
|
|
|
my_toupper_unicode_bmp(MY_CASEFOLD_INFO *uni_plane, my_wc_t *wc)
|
2023-04-18 04:44:03 +02:00
|
|
|
{
|
2023-02-24 16:22:32 +01:00
|
|
|
const MY_CASEFOLD_CHARACTER *page;
|
2023-04-18 04:44:03 +02:00
|
|
|
DBUG_ASSERT(*wc <= uni_plane->maxchar);
|
|
|
|
if ((page= uni_plane->page[*wc >> 8]))
|
|
|
|
*wc= page[*wc & 0xFF].toupper;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static inline void
|
2023-02-24 16:22:32 +01:00
|
|
|
my_tolower_unicode(MY_CASEFOLD_INFO *uni_plane, my_wc_t *wc)
|
2023-04-18 04:44:03 +02:00
|
|
|
{
|
|
|
|
if (*wc <= uni_plane->maxchar)
|
|
|
|
{
|
2023-02-24 16:22:32 +01:00
|
|
|
const MY_CASEFOLD_CHARACTER *page;
|
2023-04-18 04:44:03 +02:00
|
|
|
if ((page= uni_plane->page[(*wc >> 8)]))
|
|
|
|
*wc= page[*wc & 0xFF].tolower;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static inline void
|
2023-02-24 16:22:32 +01:00
|
|
|
my_toupper_unicode(MY_CASEFOLD_INFO *uni_plane, my_wc_t *wc)
|
2023-04-18 04:44:03 +02:00
|
|
|
{
|
|
|
|
if (*wc <= uni_plane->maxchar)
|
|
|
|
{
|
2023-02-24 16:22:32 +01:00
|
|
|
const MY_CASEFOLD_CHARACTER *page;
|
2023-04-18 04:44:03 +02:00
|
|
|
if ((page= uni_plane->page[(*wc >> 8)]))
|
|
|
|
*wc= page[*wc & 0xFF].toupper;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
MDEV-33621 Unify duplicate code in my_wildcmp_uca_impl() and my_wildcmp_unicode_impl()
This is a refactoring patch, it does not change the behaviour.
The MTR tests are being added only to cover the LIKE predicate better.
(these tests should have been added earlier under terms of MDEV 9711).
This patch does not need its own specific MTR tests.
Moving the duplicate code into a new shared file ctype-wildcmp.inl
and including it from multiple places, to define the following functions:
- my_wildcmp_uca_impl(), in ctype-uca.c
For utf8mb3, utf8mb4, ucs2, utf16, utf32, using cs->cset->mb_wc().
For UCA based collations.
- my_wildcmp_mb2_or_mb4_general_ci_impl(), in ctype-ucs2.c:
For ucs2, utf16, utf32, using cs->cset->mb_wc().
For general_ci-style collations:
- xxx_general_ci
- xxx_general_mysql500_ci
- xxx_general_nopad_ci
- my_wildcmp_mb2_or_mb4_bin_impl(), in ctype-ucs2.c:
For ucs2, utf16, utf32, using cs->cset->mb_wc().
For _bin collations:
- xxx_bin
- xxx_nopad_bin
- my_wildcmp_utf8mb3_general_ci_impl(), in ctype-utf8.c
Optimized for utf8mb3, using my_mb_wc_utf8mb3_quick().
For general_ci-style collations:
- utf8mb3_general_ci
- utf8mb3_general_mysql500_ci
- utf8mb3_general_nopad_ci
- my_wildcmp_utf8mb4_general_ci_impl(), in ctype-utf8.c
Optimized for utf8mb4, using my_mb_wc_utf8mb4_quick().
For general_ci-style collations:
- utf8mb4_general_ci
- utf8mb4_general_nopad_ci
2024-03-07 09:00:36 +01:00
|
|
|
/*
|
|
|
|
Compare two characters for equality, according to the collation.
|
|
|
|
For simple Unicode AI CI collations, e.g. utf8mb4_general_ci.
|
|
|
|
|
|
|
|
@return TRUE if the two characters are equal
|
|
|
|
@return FALSE otherwise
|
|
|
|
*/
|
|
|
|
static inline my_bool
|
|
|
|
my_casefold_char_eq_general_ci(MY_CASEFOLD_INFO *casefold,
|
|
|
|
my_wc_t wc1, my_wc_t wc2)
|
|
|
|
{
|
|
|
|
DBUG_ASSERT(casefold->simple_weight);
|
|
|
|
my_tosort_unicode(casefold, &wc1);
|
|
|
|
my_tosort_unicode(casefold, &wc2);
|
|
|
|
return wc1 == wc2;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2023-04-26 13:27:01 +02:00
|
|
|
/*
|
|
|
|
Compare two characters for equality, according to the collation.
|
|
|
|
For simple Uncode AS CI collations, e.g. utf8mb4_general1400_as_ci.
|
|
|
|
@return TRUE if the two characters are equal
|
|
|
|
@return FALSE otherwise
|
|
|
|
*/
|
|
|
|
static inline my_bool
|
|
|
|
my_casefold_char_eq_general_as_ci(MY_CASEFOLD_INFO *casefold,
|
|
|
|
my_wc_t wc1, my_wc_t wc2)
|
|
|
|
{
|
|
|
|
my_toupper_unicode(casefold, &wc1);
|
|
|
|
my_toupper_unicode(casefold, &wc2);
|
|
|
|
return wc1 == wc2;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2023-02-24 16:22:32 +01:00
|
|
|
extern MY_CASEFOLD_INFO my_casefold_default;
|
|
|
|
extern MY_CASEFOLD_INFO my_casefold_turkish;
|
|
|
|
extern MY_CASEFOLD_INFO my_casefold_mysql500;
|
|
|
|
extern MY_CASEFOLD_INFO my_casefold_unicode520;
|
2023-03-02 14:37:36 +01:00
|
|
|
extern MY_CASEFOLD_INFO my_casefold_unicode1400;
|
|
|
|
extern MY_CASEFOLD_INFO my_casefold_unicode1400tr;
|
2023-02-24 16:22:32 +01:00
|
|
|
|
|
|
|
|
2024-08-17 10:56:28 +02:00
|
|
|
my_strnxfrm_pad_ret_t
|
|
|
|
my_strxfrm_pad_nweights_unicode_be2(uchar *str, uchar *strend,
|
|
|
|
size_t nweights);
|
|
|
|
|
2023-04-26 13:27:01 +02:00
|
|
|
size_t my_strxfrm_pad_unicode_be2(uchar *str, uchar *strend);
|
|
|
|
|
2024-08-17 10:56:28 +02:00
|
|
|
my_strnxfrm_pad_ret_t
|
|
|
|
my_strxfrm_pad_nweights_unicode_be3(uchar *str, uchar *strend,
|
|
|
|
size_t nweights);
|
|
|
|
|
2023-04-26 13:27:01 +02:00
|
|
|
size_t my_strxfrm_pad_unicode_be3(uchar *str, uchar *strend);
|
2018-10-19 12:20:31 +02:00
|
|
|
|
|
|
|
|
|
|
|
#define PUT_WC_BE2_HAVE_1BYTE(dst, de, wc) \
|
|
|
|
do { *dst++= (uchar) (wc >> 8); if (dst < de) *dst++= (uchar) (wc & 0xFF); } while(0)
|
|
|
|
|
2023-04-26 13:27:01 +02:00
|
|
|
#define PUT_WC_BE3_HAVE_1BYTE(dst, de, wc) \
|
|
|
|
do { *dst++= (uchar) (wc >> 16); \
|
|
|
|
if (dst < de) *dst++= (uchar) ((wc >> 8) & 0xFF);\
|
|
|
|
if (dst < de) *dst++= (uchar) (wc & 0xFF);\
|
|
|
|
} while(0)
|
|
|
|
|
2018-10-19 12:20:31 +02:00
|
|
|
#endif /* CTYPE_UNIDATA_H_INCLUDED */
|