2015-03-04 09:16:43 +04:00
|
|
|
/*
|
|
|
|
Copyright (c) 2015, MariaDB Foundation
|
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation; version 2 of the License.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
#ifndef MY_FUNCTION_NAME
|
|
|
|
#error MY_FUNCTION_NAME is not defined
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if defined(IS_MB3_CHAR) && !defined(IS_MB2_CHAR)
|
|
|
|
#error IS_MB3_CHAR is defined, while IS_MB2_CHAR is not!
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if defined(IS_MB4_CHAR) && !defined(IS_MB3_CHAR)
|
|
|
|
#error IS_MB4_CHAR is defined, while IS_MB3_CHAR is not!
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
2015-03-13 16:51:36 +04:00
|
|
|
#ifdef DEFINE_ASIAN_ROUTINES
|
|
|
|
#define DEFINE_WELL_FORMED_LEN
|
|
|
|
#define DEFINE_WELL_FORMED_CHAR_LENGTH
|
|
|
|
#define DEFINE_CHARLEN
|
2015-08-14 18:34:41 +04:00
|
|
|
#define DEFINE_NATIVE_TO_MB_VARLEN
|
2015-03-13 16:51:36 +04:00
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef DEFINE_CHARLEN
|
|
|
|
/**
|
|
|
|
Returns length of the left-most character of a string.
|
|
|
|
@param cs - charset with mbminlen==1 and mbmaxlen<=4
|
|
|
|
@param b - the beginning of the string
|
|
|
|
@param e - the end of the string
|
|
|
|
|
|
|
|
@return MY_CS_ILSEQ if a bad byte sequence was found
|
|
|
|
@return MY_CS_TOOSMALL(N) if the string ended unexpectedly
|
|
|
|
@return >0 if a valid character was found
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
MY_FUNCTION_NAME(charlen)(CHARSET_INFO *cs __attribute__((unused)),
|
|
|
|
const uchar *b, const uchar *e)
|
|
|
|
{
|
|
|
|
DBUG_ASSERT(cs->mbminlen == 1);
|
|
|
|
DBUG_ASSERT(cs->mbmaxlen <= 4);
|
|
|
|
|
|
|
|
if (b >= e)
|
|
|
|
return MY_CS_TOOSMALL;
|
|
|
|
if ((uchar) b[0] < 128)
|
|
|
|
return 1; /* Single byte ASCII character */
|
|
|
|
|
|
|
|
#ifdef IS_8BIT_CHAR
|
|
|
|
if (IS_8BIT_CHAR(b[0]))
|
|
|
|
{
|
|
|
|
/* Single byte non-ASCII character, e.g. half width kana in sjis */
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
if (b + 2 > e)
|
|
|
|
return MY_CS_TOOSMALLN(2);
|
|
|
|
if (IS_MB2_CHAR(b[0], b[1]))
|
|
|
|
return 2; /* Double byte character */
|
|
|
|
|
|
|
|
#ifdef IS_MB3_CHAR
|
|
|
|
if (b + 3 > e)
|
|
|
|
return MY_CS_TOOSMALLN(3);
|
|
|
|
if (IS_MB3_CHAR(b[0], b[1], b[2]))
|
|
|
|
return 3; /* Three-byte character */
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef IS_MB4_CHAR
|
|
|
|
if (b + 4 > e)
|
|
|
|
return MY_CS_TOOSMALLN(4);
|
|
|
|
if (IS_MB4_CHAR(b[0], b[1], b[2], b[3]))
|
|
|
|
return 4; /* Four-byte character */
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* Wrong byte sequence */
|
|
|
|
return MY_CS_ILSEQ;
|
|
|
|
}
|
|
|
|
#endif /* DEFINE_WELL_FORMED_LEN */
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef DEFINE_WELL_FORMED_LEN
|
2015-03-04 09:16:43 +04:00
|
|
|
/**
|
|
|
|
Returns well formed length of a character string with
|
|
|
|
variable character length for character sets with:
|
|
|
|
- mbminlen == 1
|
|
|
|
- mbmaxlen == 2, 3, or 4
|
|
|
|
*/
|
|
|
|
static size_t
|
|
|
|
MY_FUNCTION_NAME(well_formed_len)(CHARSET_INFO *cs __attribute__((unused)),
|
|
|
|
const char *b, const char *e,
|
|
|
|
size_t nchars, int *error)
|
|
|
|
{
|
|
|
|
const char *b0= b;
|
|
|
|
|
|
|
|
DBUG_ASSERT(cs->mbminlen == 1);
|
|
|
|
DBUG_ASSERT(cs->mbmaxlen <= 4);
|
|
|
|
|
|
|
|
for (*error= 0 ; b < e && nchars-- ; )
|
|
|
|
{
|
|
|
|
if ((uchar) b[0] < 128)
|
|
|
|
{
|
|
|
|
b++; /* Single byte ASCII character */
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (b + 2 <= e && IS_MB2_CHAR(b[0], b[1]))
|
|
|
|
{
|
|
|
|
b+= 2; /* Double byte character */
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef IS_MB3_CHAR
|
|
|
|
if (b + 3 <= e && IS_MB3_CHAR(b[0], b[1], b[2]))
|
|
|
|
{
|
|
|
|
b+= 3; /* Three-byte character */
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef IS_MB4_CHAR
|
|
|
|
if (b + 4 <= e && IS_MB4_CHAR(b[0], b[1], b[2], b[3]))
|
|
|
|
{
|
|
|
|
b+= 4; /* Four-byte character */
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef IS_8BIT_CHAR
|
|
|
|
if (IS_8BIT_CHAR(b[0]))
|
|
|
|
{
|
|
|
|
b++; /* Single byte non-ASCII character, e.g. half width kana in sjis */
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* Wrong byte sequence */
|
|
|
|
*error= 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return b - b0;
|
|
|
|
}
|
|
|
|
|
2015-03-13 16:51:36 +04:00
|
|
|
#endif /* DEFINE_WELL_FORMED_LEN */
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef DEFINE_WELL_FORMED_CHAR_LENGTH
|
|
|
|
/**
|
|
|
|
Returns well formed length of a string
|
|
|
|
measured in characters (rather than in bytes).
|
|
|
|
Version for character sets that define IS_MB?_CHAR(), e.g. big5.
|
|
|
|
*/
|
|
|
|
static size_t
|
|
|
|
MY_FUNCTION_NAME(well_formed_char_length)(CHARSET_INFO *cs __attribute__((unused)),
|
|
|
|
const char *b, const char *e,
|
|
|
|
size_t nchars,
|
|
|
|
MY_STRCOPY_STATUS *status)
|
|
|
|
{
|
|
|
|
size_t nchars0= nchars;
|
|
|
|
for ( ; b < e && nchars ; nchars--)
|
|
|
|
{
|
|
|
|
if ((uchar) b[0] < 128)
|
|
|
|
{
|
|
|
|
b++; /* Single byte ASCII character */
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (b + 2 <= e && IS_MB2_CHAR(b[0], b[1]))
|
|
|
|
{
|
|
|
|
b+= 2; /* Double byte character */
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef IS_MB3_CHAR
|
|
|
|
if (b + 3 <= e && IS_MB3_CHAR(b[0], b[1], b[2]))
|
|
|
|
{
|
|
|
|
b+= 3; /* Three-byte character */
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef IS_MB4_CHAR
|
|
|
|
if (b + 4 <= e && IS_MB4_CHAR(b[0], b[1], b[2], b[3]))
|
|
|
|
{
|
|
|
|
b+= 4; /* Four-byte character */
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef IS_8BIT_CHAR
|
|
|
|
if (IS_8BIT_CHAR(b[0]))
|
|
|
|
{
|
|
|
|
b++; /* Single byte non-ASCII character, e.g. half width kana in sjis */
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* Wrong byte sequence */
|
|
|
|
status->m_source_end_pos= status->m_well_formed_error_pos= b;
|
|
|
|
return nchars0 - nchars;
|
|
|
|
}
|
|
|
|
status->m_source_end_pos= b;
|
|
|
|
status->m_well_formed_error_pos= NULL;
|
|
|
|
return nchars0 - nchars;
|
|
|
|
}
|
|
|
|
#endif /* DEFINE_WELL_FORMED_CHAR_LENGTH */
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
|
|
|
|
#ifndef CHARLEN
|
|
|
|
#error CHARLEN is not defined
|
|
|
|
#endif
|
|
|
|
/**
|
|
|
|
Returns well formed length of a string
|
|
|
|
measured in characters (rather than in bytes).
|
|
|
|
Version for character sets that define CHARLEN(), e.g. utf8.
|
|
|
|
CHARLEN(cs,b,e) must use the same return code convension that mb_wc() does:
|
|
|
|
- a positive number in the range [1-mbmaxlen] if a valid
|
|
|
|
single-byte or multi-byte character was found
|
|
|
|
- MY_CS_ILSEQ (0) on a bad byte sequence
|
|
|
|
- MY_CS_TOOSMALLxx if the incoming sequence is incomplete
|
|
|
|
*/
|
|
|
|
static size_t
|
|
|
|
MY_FUNCTION_NAME(well_formed_char_length)(CHARSET_INFO *cs __attribute__((unused)),
|
|
|
|
const char *b, const char *e,
|
|
|
|
size_t nchars,
|
|
|
|
MY_STRCOPY_STATUS *status)
|
|
|
|
{
|
|
|
|
size_t nchars0= nchars;
|
|
|
|
int chlen;
|
|
|
|
for ( ; nchars ; nchars--, b+= chlen)
|
|
|
|
{
|
|
|
|
if ((chlen= CHARLEN(cs, (uchar*) b, (uchar*) e)) <= 0)
|
|
|
|
{
|
|
|
|
status->m_well_formed_error_pos= b < e ? b : NULL;
|
|
|
|
status->m_source_end_pos= b;
|
|
|
|
return nchars0 - nchars;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
status->m_well_formed_error_pos= NULL;
|
|
|
|
status->m_source_end_pos= b;
|
|
|
|
return nchars0 - nchars;
|
|
|
|
}
|
|
|
|
#endif /* DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN */
|
2015-06-26 13:40:28 +04:00
|
|
|
|
2015-08-14 18:34:41 +04:00
|
|
|
|
|
|
|
#ifdef DEFINE_NATIVE_TO_MB_VARLEN
|
|
|
|
/*
|
|
|
|
Write a native 2-byte character.
|
|
|
|
If the full character does not fit, only the first byte is written.
|
|
|
|
*/
|
|
|
|
static inline int
|
|
|
|
my_native_to_mb_fixed2(my_wc_t wc, uchar *s, uchar *e)
|
|
|
|
{
|
|
|
|
/* The caller must insure there is a space for at least one byte */
|
|
|
|
DBUG_ASSERT(s < e);
|
2015-11-24 22:47:42 +04:00
|
|
|
s[0]= (uchar) (wc >> 8);
|
2015-08-14 18:34:41 +04:00
|
|
|
if (s + 2 > e)
|
|
|
|
return MY_CS_TOOSMALL2;
|
|
|
|
s[1]= wc & 0xFF;
|
|
|
|
return 2;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
Write a native 3-byte character.
|
|
|
|
If the full character does not fit, only the leading bytes are written.
|
|
|
|
*/
|
|
|
|
static inline int
|
|
|
|
my_native_to_mb_fixed3(my_wc_t wc, uchar *s, uchar *e)
|
|
|
|
{
|
|
|
|
/* The caller must insure there is a space for at least one byte */
|
|
|
|
DBUG_ASSERT(s < e);
|
2015-11-24 22:47:42 +04:00
|
|
|
s[0]= (uchar) (wc >> 16);
|
2015-08-14 18:34:41 +04:00
|
|
|
if (s + 2 > e)
|
|
|
|
return MY_CS_TOOSMALL2;
|
|
|
|
s[1]= (wc >> 8) & 0xFF;
|
|
|
|
if (s + 3 > e)
|
|
|
|
return MY_CS_TOOSMALL3;
|
|
|
|
s[2]= wc & 0xFF;
|
|
|
|
return 3;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
Write a native 1-byte or 2-byte or 3-byte character.
|
|
|
|
*/
|
|
|
|
|
|
|
|
static int
|
|
|
|
MY_FUNCTION_NAME(native_to_mb)(CHARSET_INFO *cs __attribute__((unused)),
|
|
|
|
my_wc_t wc, uchar *s, uchar *e)
|
|
|
|
{
|
|
|
|
if (s >= e)
|
|
|
|
return MY_CS_TOOSMALL;
|
|
|
|
if ((int) wc <= 0xFF)
|
|
|
|
{
|
|
|
|
s[0]= (uchar) wc;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
#ifdef IS_MB3_HEAD
|
|
|
|
if (wc > 0xFFFF)
|
|
|
|
return my_native_to_mb_fixed3(wc, s, e);
|
|
|
|
#endif
|
|
|
|
return my_native_to_mb_fixed2(wc, s, e);
|
|
|
|
}
|
|
|
|
#endif /* DEFINE_NATIVE_TO_MB_VARLEN */
|
|
|
|
|
|
|
|
|
2015-06-26 13:40:28 +04:00
|
|
|
#undef MY_FUNCTION_NAME
|