mariadb/strings/ctype-unidata.h
Alexander Barkov 0d17c540a5 MDEV-27277 Add a warning when max_sort_length is reached
Step#1: fixing the return type of strnxfrm() from size_t to this structure:

typedef struct
{
  size_t m_output_length;
  size_t m_source_length_used;
  uint m_warnings;
} my_strnxfrm_ret_t;
2024-10-22 21:42:53 +07:00

199 lines
5.5 KiB
C

#ifndef CTYPE_UNIDATA_H_INCLUDED
#define CTYPE_UNIDATA_H_INCLUDED
/*
Copyright (c) 2018, 2023 MariaDB Corporation
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
extern const uint16 weight_general_ci_page00[256];
extern const uint16 *weight_general_ci_index[256];
extern const uint16 weight_general_mysql500_ci_page00[256];
extern const uint16 *weight_general_mysql500_ci_index[256];
extern const MY_CASEFOLD_CHARACTER u300_casefold_page00[256];
static inline my_wc_t my_u300_tolower_7bit(uchar ch)
{
return u300_casefold_page00[ch].tolower;
}
static inline my_wc_t my_u300_toupper_7bit(uchar ch)
{
return u300_casefold_page00[ch].toupper;
}
static inline my_wc_t my_general_ci_bmp_char_to_weight(my_wc_t wc)
{
const uint16 *page;
DBUG_ASSERT((wc >> 8) < array_elements(weight_general_ci_index));
page= weight_general_ci_index[wc >> 8];
return page ? page[wc & 0xFF] : wc;
}
static inline my_wc_t my_general_ci_char_to_weight(my_wc_t wc)
{
if ((wc >> 8) < array_elements(weight_general_ci_index))
return my_general_ci_bmp_char_to_weight(wc);
return MY_CS_REPLACEMENT_CHARACTER;
}
static inline my_wc_t my_general_mysql500_ci_bmp_char_to_weight(my_wc_t wc)
{
const uint16 *page;
DBUG_ASSERT((wc >> 8) < array_elements(weight_general_mysql500_ci_index));
page= weight_general_mysql500_ci_index[wc >> 8];
return page ? page[wc & 0xFF] : wc;
}
static inline void my_tosort_unicode_bmp(MY_CASEFOLD_INFO *uni_plane,
my_wc_t *wc)
{
const uint16 *page;
DBUG_ASSERT(*wc <= uni_plane->maxchar);
if ((page= uni_plane->simple_weight[*wc >> 8]))
*wc= page[*wc & 0xFF];
}
static inline void my_tosort_unicode(MY_CASEFOLD_INFO *uni_plane,
my_wc_t *wc)
{
if (*wc <= uni_plane->maxchar)
{
const uint16 *page;
if ((page= uni_plane->simple_weight[*wc >> 8]))
*wc= page[*wc & 0xFF];
}
else
{
*wc= MY_CS_REPLACEMENT_CHARACTER;
}
}
static inline void
my_tolower_unicode_bmp(MY_CASEFOLD_INFO *uni_plane, my_wc_t *wc)
{
const MY_CASEFOLD_CHARACTER *page;
DBUG_ASSERT(*wc <= uni_plane->maxchar);
if ((page= uni_plane->page[*wc >> 8]))
*wc= page[*wc & 0xFF].tolower;
}
static inline void
my_toupper_unicode_bmp(MY_CASEFOLD_INFO *uni_plane, my_wc_t *wc)
{
const MY_CASEFOLD_CHARACTER *page;
DBUG_ASSERT(*wc <= uni_plane->maxchar);
if ((page= uni_plane->page[*wc >> 8]))
*wc= page[*wc & 0xFF].toupper;
}
static inline void
my_tolower_unicode(MY_CASEFOLD_INFO *uni_plane, my_wc_t *wc)
{
if (*wc <= uni_plane->maxchar)
{
const MY_CASEFOLD_CHARACTER *page;
if ((page= uni_plane->page[(*wc >> 8)]))
*wc= page[*wc & 0xFF].tolower;
}
}
static inline void
my_toupper_unicode(MY_CASEFOLD_INFO *uni_plane, my_wc_t *wc)
{
if (*wc <= uni_plane->maxchar)
{
const MY_CASEFOLD_CHARACTER *page;
if ((page= uni_plane->page[(*wc >> 8)]))
*wc= page[*wc & 0xFF].toupper;
}
}
/*
Compare two characters for equality, according to the collation.
For simple Unicode AI CI collations, e.g. utf8mb4_general_ci.
@return TRUE if the two characters are equal
@return FALSE otherwise
*/
static inline my_bool
my_casefold_char_eq_general_ci(MY_CASEFOLD_INFO *casefold,
my_wc_t wc1, my_wc_t wc2)
{
DBUG_ASSERT(casefold->simple_weight);
my_tosort_unicode(casefold, &wc1);
my_tosort_unicode(casefold, &wc2);
return wc1 == wc2;
}
/*
Compare two characters for equality, according to the collation.
For simple Uncode AS CI collations, e.g. utf8mb4_general1400_as_ci.
@return TRUE if the two characters are equal
@return FALSE otherwise
*/
static inline my_bool
my_casefold_char_eq_general_as_ci(MY_CASEFOLD_INFO *casefold,
my_wc_t wc1, my_wc_t wc2)
{
my_toupper_unicode(casefold, &wc1);
my_toupper_unicode(casefold, &wc2);
return wc1 == wc2;
}
extern MY_CASEFOLD_INFO my_casefold_default;
extern MY_CASEFOLD_INFO my_casefold_turkish;
extern MY_CASEFOLD_INFO my_casefold_mysql500;
extern MY_CASEFOLD_INFO my_casefold_unicode520;
extern MY_CASEFOLD_INFO my_casefold_unicode1400;
extern MY_CASEFOLD_INFO my_casefold_unicode1400tr;
my_strnxfrm_pad_ret_t
my_strxfrm_pad_nweights_unicode_be2(uchar *str, uchar *strend,
size_t nweights);
size_t my_strxfrm_pad_unicode_be2(uchar *str, uchar *strend);
my_strnxfrm_pad_ret_t
my_strxfrm_pad_nweights_unicode_be3(uchar *str, uchar *strend,
size_t nweights);
size_t my_strxfrm_pad_unicode_be3(uchar *str, uchar *strend);
#define PUT_WC_BE2_HAVE_1BYTE(dst, de, wc) \
do { *dst++= (uchar) (wc >> 8); if (dst < de) *dst++= (uchar) (wc & 0xFF); } while(0)
#define PUT_WC_BE3_HAVE_1BYTE(dst, de, wc) \
do { *dst++= (uchar) (wc >> 16); \
if (dst < de) *dst++= (uchar) ((wc >> 8) & 0xFF);\
if (dst < de) *dst++= (uchar) (wc & 0xFF);\
} while(0)
#endif /* CTYPE_UNIDATA_H_INCLUDED */