mirror of
https://github.com/MariaDB/server.git
synced 2025-01-15 19:42:28 +01:00
a206658b98
This change removed 68 explict strlen() calls from the code. The following renames was done to ensure we don't use the old names when merging code from earlier releases, as using the new variables for print function could result in crashes: - charset->csname renamed to charset->cs_name - charset->name renamed to charset->coll_name Almost everything where mechanical changes except: - Changed to use the new Protocol::store(LEX_CSTRING..) when possible - Changed to use field->store(LEX_CSTRING*, CHARSET_INFO*) when possible - Changed to use String->append(LEX_CSTRING&) when possible Other things: - There where compiler issues with ensuring that all character set names points to the same string: gcc doesn't allow one to use integer constants when defining global structures (constant char * pointers works fine). To get around this, I declared defines for each character set name length.
3524 lines
97 KiB
C
3524 lines
97 KiB
C
/* Copyright (c) 2003, 2013, Oracle and/or its affiliates
|
|
Copyright (c) 2009, 2020, MariaDB
|
|
|
|
This library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Library General Public
|
|
License as published by the Free Software Foundation; version 2
|
|
of the License.
|
|
|
|
This library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Library General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Library General Public
|
|
License along with this library; if not, write to the Free
|
|
Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
|
|
MA 02110-1335 USA */
|
|
|
|
/* UCS2 support. Written by Alexander Barkov <bar@mysql.com> */
|
|
|
|
#include "strings_def.h"
|
|
#include <m_ctype.h>
|
|
#include <my_sys.h>
|
|
#include <stdarg.h>
|
|
|
|
#include "ctype-unidata.h"
|
|
|
|
|
|
#if defined(HAVE_CHARSET_utf16) || defined(HAVE_CHARSET_ucs2)
|
|
#define HAVE_CHARSET_mb2
|
|
#endif
|
|
|
|
|
|
#if defined(HAVE_CHARSET_mb2) || defined(HAVE_CHARSET_utf32)
|
|
#define HAVE_CHARSET_mb2_or_mb4
|
|
#endif
|
|
|
|
#ifndef EILSEQ
|
|
#define EILSEQ ENOENT
|
|
#endif
|
|
|
|
#undef ULONGLONG_MAX
|
|
#define ULONGLONG_MAX (~(ulonglong) 0)
|
|
#define MAX_NEGATIVE_NUMBER ((ulonglong) 0x8000000000000000LL)
|
|
#define INIT_CNT 9
|
|
#define LFACTOR 1000000000ULL
|
|
#define LFACTOR1 10000000000ULL
|
|
#define LFACTOR2 100000000000ULL
|
|
|
|
#if defined(HAVE_CHARSET_utf32) || defined(HAVE_CHARSET_mb2)
|
|
static unsigned long lfactor[9]=
|
|
{ 1L, 10L, 100L, 1000L, 10000L, 100000L, 1000000L, 10000000L, 100000000L };
|
|
#endif
|
|
|
|
|
|
#ifdef HAVE_CHARSET_mb2_or_mb4
|
|
static size_t
|
|
my_caseup_str_mb2_or_mb4(CHARSET_INFO * cs __attribute__((unused)),
|
|
char * s __attribute__((unused)))
|
|
{
|
|
DBUG_ASSERT(0);
|
|
return 0;
|
|
}
|
|
|
|
|
|
static size_t
|
|
my_casedn_str_mb2_or_mb4(CHARSET_INFO *cs __attribute__((unused)),
|
|
char * s __attribute__((unused)))
|
|
{
|
|
DBUG_ASSERT(0);
|
|
return 0;
|
|
}
|
|
|
|
|
|
static int
|
|
my_strcasecmp_mb2_or_mb4(CHARSET_INFO *cs __attribute__((unused)),
|
|
const char *s __attribute__((unused)),
|
|
const char *t __attribute__((unused)))
|
|
{
|
|
DBUG_ASSERT(0);
|
|
return 0;
|
|
}
|
|
|
|
|
|
typedef enum
|
|
{
|
|
MY_CHAR_COPY_OK= 0, /* The character was Okey */
|
|
MY_CHAR_COPY_ERROR= 1, /* The character was not Ok, and could not fix */
|
|
MY_CHAR_COPY_FIXED= 2 /* The character was not Ok, was fixed to '?' */
|
|
} my_char_copy_status_t;
|
|
|
|
|
|
/*
|
|
Copies an incomplete character, lef-padding it with 0x00 bytes.
|
|
|
|
@param cs Character set
|
|
@param dst The destination string
|
|
@param dst_length Space available in dst
|
|
@param src The source string
|
|
@param src_length Length of src
|
|
@param nchars Copy not more than nchars characters.
|
|
The "nchars" parameter of the caller.
|
|
Only 0 and non-0 are important here.
|
|
@param fix What to do if after zero-padding didn't get a valid
|
|
character:
|
|
- FALSE - exit with error.
|
|
- TRUE - try to put '?' instead.
|
|
|
|
@return MY_CHAR_COPY_OK if after zero-padding got a valid character.
|
|
cs->mbmaxlen bytes were written to "dst".
|
|
@return MY_CHAR_COPY_FIXED if after zero-padding did not get a valid
|
|
character, but wrote '?' to the destination
|
|
string instead.
|
|
cs->mbminlen bytes were written to "dst".
|
|
@return MY_CHAR_COPY_ERROR If failed and nothing was written to "dst".
|
|
Possible reasons:
|
|
- dst_length was too short
|
|
- nchars was 0
|
|
- the character after padding appeared not
|
|
to be valid, and could not fix it to '?'.
|
|
*/
|
|
static my_char_copy_status_t
|
|
my_copy_incomplete_char(CHARSET_INFO *cs,
|
|
char *dst, size_t dst_length,
|
|
const char *src, size_t src_length,
|
|
size_t nchars, my_bool fix)
|
|
{
|
|
size_t pad_length;
|
|
size_t src_offset= src_length % cs->mbminlen;
|
|
if (dst_length < cs->mbminlen || !nchars)
|
|
return MY_CHAR_COPY_ERROR;
|
|
|
|
pad_length= cs->mbminlen - src_offset;
|
|
bzero(dst, pad_length);
|
|
memmove(dst + pad_length, src, src_offset);
|
|
/*
|
|
In some cases left zero-padding can create an incorrect character.
|
|
For example:
|
|
INSERT INTO t1 (utf32_column) VALUES (0x110000);
|
|
We'll pad the value to 0x00110000, which is a wrong UTF32 sequence!
|
|
The valid characters range is limited to 0x00000000..0x0010FFFF.
|
|
|
|
Make sure we didn't pad to an incorrect character.
|
|
*/
|
|
if (my_ci_charlen(cs, (uchar *) dst, (uchar *) dst + cs->mbminlen) ==
|
|
(int) cs->mbminlen)
|
|
return MY_CHAR_COPY_OK;
|
|
|
|
if (fix &&
|
|
my_ci_wc_mb(cs, '?', (uchar *) dst, (uchar *) dst + cs->mbminlen) ==
|
|
(int) cs->mbminlen)
|
|
return MY_CHAR_COPY_FIXED;
|
|
|
|
return MY_CHAR_COPY_ERROR;
|
|
}
|
|
|
|
|
|
/*
|
|
Copy an UCS2/UTF16/UTF32 string, fix bad characters.
|
|
*/
|
|
static size_t
|
|
my_copy_fix_mb2_or_mb4(CHARSET_INFO *cs,
|
|
char *dst, size_t dst_length,
|
|
const char *src, size_t src_length,
|
|
size_t nchars, MY_STRCOPY_STATUS *status)
|
|
{
|
|
size_t length2, src_offset= src_length % cs->mbminlen;
|
|
my_char_copy_status_t padstatus;
|
|
|
|
if (!src_offset)
|
|
return my_copy_fix_mb(cs, dst, dst_length,
|
|
src, src_length, nchars, status);
|
|
if ((padstatus= my_copy_incomplete_char(cs, dst, dst_length,
|
|
src, src_length, nchars, TRUE)) ==
|
|
MY_CHAR_COPY_ERROR)
|
|
{
|
|
status->m_source_end_pos= status->m_well_formed_error_pos= src;
|
|
return 0;
|
|
}
|
|
length2= my_copy_fix_mb(cs, dst + cs->mbminlen, dst_length - cs->mbminlen,
|
|
src + src_offset, src_length - src_offset,
|
|
nchars - 1, status);
|
|
if (padstatus == MY_CHAR_COPY_FIXED)
|
|
status->m_well_formed_error_pos= src;
|
|
return cs->mbminlen /* The left-padded character */ + length2;
|
|
}
|
|
|
|
|
|
static long
|
|
my_strntol_mb2_or_mb4(CHARSET_INFO *cs,
|
|
const char *nptr, size_t l, int base,
|
|
char **endptr, int *err)
|
|
{
|
|
int negative= 0;
|
|
int overflow;
|
|
int cnv;
|
|
my_wc_t wc;
|
|
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
|
|
register unsigned int cutlim;
|
|
register uint32 cutoff;
|
|
register uint32 res;
|
|
register const uchar *s= (const uchar*) nptr;
|
|
register const uchar *e= (const uchar*) nptr+l;
|
|
const uchar *save;
|
|
|
|
*err= 0;
|
|
do
|
|
{
|
|
if ((cnv= mb_wc(cs, &wc, s, e)) > 0)
|
|
{
|
|
switch (wc)
|
|
{
|
|
case ' ' : break;
|
|
case '\t': break;
|
|
case '-' : negative= !negative; break;
|
|
case '+' : break;
|
|
default : goto bs;
|
|
}
|
|
}
|
|
else /* No more characters or bad multibyte sequence */
|
|
{
|
|
if (endptr != NULL )
|
|
*endptr= (char*) s;
|
|
err[0]= (cnv==MY_CS_ILSEQ) ? EILSEQ : EDOM;
|
|
return 0;
|
|
}
|
|
s+= cnv;
|
|
} while (1);
|
|
|
|
bs:
|
|
|
|
overflow= 0;
|
|
res= 0;
|
|
save= s;
|
|
cutoff= ((uint32)~0L) / (uint32) base;
|
|
cutlim= (uint) (((uint32)~0L) % (uint32) base);
|
|
|
|
do {
|
|
if ((cnv= mb_wc(cs, &wc, s, e)) > 0)
|
|
{
|
|
s+= cnv;
|
|
if (wc >= '0' && wc <= '9')
|
|
wc-= '0';
|
|
else if (wc >= 'A' && wc <= 'Z')
|
|
wc= wc - 'A' + 10;
|
|
else if (wc >= 'a' && wc <= 'z')
|
|
wc= wc - 'a' + 10;
|
|
else
|
|
break;
|
|
if ((int)wc >= base)
|
|
break;
|
|
if (res > cutoff || (res == cutoff && wc > cutlim))
|
|
overflow= 1;
|
|
else
|
|
{
|
|
res*= (uint32) base;
|
|
res+= wc;
|
|
}
|
|
}
|
|
else if (cnv == MY_CS_ILSEQ)
|
|
{
|
|
if (endptr !=NULL )
|
|
*endptr = (char*) s;
|
|
err[0]= EILSEQ;
|
|
return 0;
|
|
}
|
|
else
|
|
{
|
|
/* No more characters */
|
|
break;
|
|
}
|
|
} while(1);
|
|
|
|
if (endptr != NULL)
|
|
*endptr = (char *) s;
|
|
|
|
if (s == save)
|
|
{
|
|
err[0]= EDOM;
|
|
return 0L;
|
|
}
|
|
|
|
if (negative)
|
|
{
|
|
if (res > (uint32) INT_MIN32)
|
|
overflow= 1;
|
|
}
|
|
else if (res > INT_MAX32)
|
|
overflow= 1;
|
|
|
|
if (overflow)
|
|
{
|
|
err[0]= ERANGE;
|
|
return negative ? INT_MIN32 : INT_MAX32;
|
|
}
|
|
|
|
return (negative ? -((long) res) : (long) res);
|
|
}
|
|
|
|
|
|
static ulong
|
|
my_strntoul_mb2_or_mb4(CHARSET_INFO *cs,
|
|
const char *nptr, size_t l, int base,
|
|
char **endptr, int *err)
|
|
{
|
|
int negative= 0;
|
|
int overflow;
|
|
int cnv;
|
|
my_wc_t wc;
|
|
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
|
|
register unsigned int cutlim;
|
|
register uint32 cutoff;
|
|
register uint32 res;
|
|
register const uchar *s= (const uchar*) nptr;
|
|
register const uchar *e= (const uchar*) nptr + l;
|
|
const uchar *save;
|
|
|
|
*err= 0;
|
|
do
|
|
{
|
|
if ((cnv= mb_wc(cs, &wc, s, e)) > 0)
|
|
{
|
|
switch (wc)
|
|
{
|
|
case ' ' : break;
|
|
case '\t': break;
|
|
case '-' : negative= !negative; break;
|
|
case '+' : break;
|
|
default : goto bs;
|
|
}
|
|
}
|
|
else /* No more characters or bad multibyte sequence */
|
|
{
|
|
if (endptr !=NULL )
|
|
*endptr= (char*)s;
|
|
err[0]= (cnv == MY_CS_ILSEQ) ? EILSEQ : EDOM;
|
|
return 0;
|
|
}
|
|
s+= cnv;
|
|
} while (1);
|
|
|
|
bs:
|
|
|
|
overflow= 0;
|
|
res= 0;
|
|
save= s;
|
|
cutoff= ((uint32)~0L) / (uint32) base;
|
|
cutlim= (uint) (((uint32)~0L) % (uint32) base);
|
|
|
|
do
|
|
{
|
|
if ((cnv= mb_wc(cs, &wc, s, e)) > 0)
|
|
{
|
|
s+= cnv;
|
|
if (wc >= '0' && wc <= '9')
|
|
wc-= '0';
|
|
else if (wc >= 'A' && wc <= 'Z')
|
|
wc= wc - 'A' + 10;
|
|
else if (wc >= 'a' && wc <= 'z')
|
|
wc= wc - 'a' + 10;
|
|
else
|
|
break;
|
|
if ((int) wc >= base)
|
|
break;
|
|
if (res > cutoff || (res == cutoff && wc > cutlim))
|
|
overflow = 1;
|
|
else
|
|
{
|
|
res*= (uint32) base;
|
|
res+= wc;
|
|
}
|
|
}
|
|
else if (cnv == MY_CS_ILSEQ)
|
|
{
|
|
if (endptr != NULL )
|
|
*endptr= (char*)s;
|
|
err[0]= EILSEQ;
|
|
return 0;
|
|
}
|
|
else
|
|
{
|
|
/* No more characters */
|
|
break;
|
|
}
|
|
} while(1);
|
|
|
|
if (endptr != NULL)
|
|
*endptr= (char *) s;
|
|
|
|
if (s == save)
|
|
{
|
|
err[0]= EDOM;
|
|
return 0L;
|
|
}
|
|
|
|
if (overflow)
|
|
{
|
|
err[0]= (ERANGE);
|
|
return (~(uint32) 0);
|
|
}
|
|
|
|
return (negative ? -((long) res) : (long) res);
|
|
}
|
|
|
|
|
|
static longlong
|
|
my_strntoll_mb2_or_mb4(CHARSET_INFO *cs,
|
|
const char *nptr, size_t l, int base,
|
|
char **endptr, int *err)
|
|
{
|
|
int negative=0;
|
|
int overflow;
|
|
int cnv;
|
|
my_wc_t wc;
|
|
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
|
|
register ulonglong cutoff;
|
|
register unsigned int cutlim;
|
|
register ulonglong res;
|
|
register const uchar *s= (const uchar*) nptr;
|
|
register const uchar *e= (const uchar*) nptr+l;
|
|
const uchar *save;
|
|
|
|
*err= 0;
|
|
do
|
|
{
|
|
if ((cnv= mb_wc(cs, &wc, s, e)) > 0)
|
|
{
|
|
switch (wc)
|
|
{
|
|
case ' ' : break;
|
|
case '\t': break;
|
|
case '-' : negative= !negative; break;
|
|
case '+' : break;
|
|
default : goto bs;
|
|
}
|
|
}
|
|
else /* No more characters or bad multibyte sequence */
|
|
{
|
|
if (endptr !=NULL )
|
|
*endptr = (char*)s;
|
|
err[0] = (cnv==MY_CS_ILSEQ) ? EILSEQ : EDOM;
|
|
return 0;
|
|
}
|
|
s+=cnv;
|
|
} while (1);
|
|
|
|
bs:
|
|
|
|
overflow = 0;
|
|
res = 0;
|
|
save = s;
|
|
cutoff = (~(ulonglong) 0) / (unsigned long int) base;
|
|
cutlim = (uint) ((~(ulonglong) 0) % (unsigned long int) base);
|
|
|
|
do {
|
|
if ((cnv= mb_wc(cs, &wc, s, e)) > 0)
|
|
{
|
|
s+=cnv;
|
|
if ( wc>='0' && wc<='9')
|
|
wc -= '0';
|
|
else if ( wc>='A' && wc<='Z')
|
|
wc = wc - 'A' + 10;
|
|
else if ( wc>='a' && wc<='z')
|
|
wc = wc - 'a' + 10;
|
|
else
|
|
break;
|
|
if ((int)wc >= base)
|
|
break;
|
|
if (res > cutoff || (res == cutoff && wc > cutlim))
|
|
overflow = 1;
|
|
else
|
|
{
|
|
res *= (ulonglong) base;
|
|
res += wc;
|
|
}
|
|
}
|
|
else if (cnv==MY_CS_ILSEQ)
|
|
{
|
|
if (endptr !=NULL )
|
|
*endptr = (char*)s;
|
|
err[0]=EILSEQ;
|
|
return 0;
|
|
}
|
|
else
|
|
{
|
|
/* No more characters */
|
|
break;
|
|
}
|
|
} while(1);
|
|
|
|
if (endptr != NULL)
|
|
*endptr = (char *) s;
|
|
|
|
if (s == save)
|
|
{
|
|
err[0]=EDOM;
|
|
return 0L;
|
|
}
|
|
|
|
if (negative)
|
|
{
|
|
if (res > (ulonglong) LONGLONG_MIN)
|
|
overflow = 1;
|
|
}
|
|
else if (res > (ulonglong) LONGLONG_MAX)
|
|
overflow = 1;
|
|
|
|
if (overflow)
|
|
{
|
|
err[0]=ERANGE;
|
|
return negative ? LONGLONG_MIN : LONGLONG_MAX;
|
|
}
|
|
|
|
return (negative ? -((longlong)res) : (longlong)res);
|
|
}
|
|
|
|
|
|
static ulonglong
|
|
my_strntoull_mb2_or_mb4(CHARSET_INFO *cs,
|
|
const char *nptr, size_t l, int base,
|
|
char **endptr, int *err)
|
|
{
|
|
int negative= 0;
|
|
int overflow;
|
|
int cnv;
|
|
my_wc_t wc;
|
|
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
|
|
register ulonglong cutoff;
|
|
register unsigned int cutlim;
|
|
register ulonglong res;
|
|
register const uchar *s= (const uchar*) nptr;
|
|
register const uchar *e= (const uchar*) nptr + l;
|
|
const uchar *save;
|
|
|
|
*err= 0;
|
|
do
|
|
{
|
|
if ((cnv= mb_wc(cs, &wc, s, e)) > 0)
|
|
{
|
|
switch (wc)
|
|
{
|
|
case ' ' : break;
|
|
case '\t': break;
|
|
case '-' : negative= !negative; break;
|
|
case '+' : break;
|
|
default : goto bs;
|
|
}
|
|
}
|
|
else /* No more characters or bad multibyte sequence */
|
|
{
|
|
if (endptr !=NULL )
|
|
*endptr = (char*)s;
|
|
err[0]= (cnv==MY_CS_ILSEQ) ? EILSEQ : EDOM;
|
|
return 0;
|
|
}
|
|
s+=cnv;
|
|
} while (1);
|
|
|
|
bs:
|
|
|
|
overflow = 0;
|
|
res = 0;
|
|
save = s;
|
|
cutoff = (~(ulonglong) 0) / (unsigned long int) base;
|
|
cutlim = (uint) ((~(ulonglong) 0) % (unsigned long int) base);
|
|
|
|
do
|
|
{
|
|
if ((cnv= mb_wc(cs, &wc, s, e)) > 0)
|
|
{
|
|
s+=cnv;
|
|
if ( wc>='0' && wc<='9')
|
|
wc -= '0';
|
|
else if ( wc>='A' && wc<='Z')
|
|
wc = wc - 'A' + 10;
|
|
else if ( wc>='a' && wc<='z')
|
|
wc = wc - 'a' + 10;
|
|
else
|
|
break;
|
|
if ((int)wc >= base)
|
|
break;
|
|
if (res > cutoff || (res == cutoff && wc > cutlim))
|
|
overflow = 1;
|
|
else
|
|
{
|
|
res *= (ulonglong) base;
|
|
res += wc;
|
|
}
|
|
}
|
|
else if (cnv==MY_CS_ILSEQ)
|
|
{
|
|
if (endptr !=NULL )
|
|
*endptr = (char*)s;
|
|
err[0]= EILSEQ;
|
|
return 0;
|
|
}
|
|
else
|
|
{
|
|
/* No more characters */
|
|
break;
|
|
}
|
|
} while(1);
|
|
|
|
if (endptr != NULL)
|
|
*endptr = (char *) s;
|
|
|
|
if (s == save)
|
|
{
|
|
err[0]= EDOM;
|
|
return 0L;
|
|
}
|
|
|
|
if (overflow)
|
|
{
|
|
err[0]= ERANGE;
|
|
return (~(ulonglong) 0);
|
|
}
|
|
|
|
return (negative ? -((longlong) res) : (longlong) res);
|
|
}
|
|
|
|
|
|
static double
|
|
my_strntod_mb2_or_mb4(CHARSET_INFO *cs,
|
|
char *nptr, size_t length,
|
|
char **endptr, int *err)
|
|
{
|
|
char buf[256];
|
|
double res;
|
|
register char *b= buf;
|
|
register const uchar *s= (const uchar*) nptr;
|
|
const uchar *end;
|
|
my_wc_t wc;
|
|
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
|
|
int cnv;
|
|
|
|
*err= 0;
|
|
/* Cut too long strings */
|
|
if (length >= sizeof(buf))
|
|
length= sizeof(buf) - 1;
|
|
end= s + length;
|
|
|
|
while ((cnv= mb_wc(cs, &wc, s, end)) > 0)
|
|
{
|
|
s+= cnv;
|
|
if (wc > (int) (uchar) 'e' || !wc)
|
|
break; /* Can't be part of double */
|
|
*b++= (char) wc;
|
|
}
|
|
|
|
*endptr= b;
|
|
res= my_strtod(buf, endptr, err);
|
|
*endptr= nptr + cs->mbminlen * (size_t) (*endptr - buf);
|
|
return res;
|
|
}
|
|
|
|
|
|
static ulonglong
|
|
my_strntoull10rnd_mb2_or_mb4(CHARSET_INFO *cs,
|
|
const char *nptr, size_t length,
|
|
int unsign_fl,
|
|
char **endptr, int *err)
|
|
{
|
|
char buf[256], *b= buf;
|
|
ulonglong res;
|
|
const uchar *end, *s= (const uchar*) nptr;
|
|
my_wc_t wc;
|
|
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
|
|
int cnv;
|
|
|
|
/* Cut too long strings */
|
|
if (length >= sizeof(buf))
|
|
length= sizeof(buf)-1;
|
|
end= s + length;
|
|
|
|
while ((cnv= mb_wc(cs, &wc, s, end)) > 0)
|
|
{
|
|
s+= cnv;
|
|
if (wc > (int) (uchar) 'e' || !wc)
|
|
break; /* Can't be a number part */
|
|
*b++= (char) wc;
|
|
}
|
|
|
|
res= my_strntoull10rnd_8bit(cs, buf, b - buf, unsign_fl, endptr, err);
|
|
*endptr= (char*) nptr + cs->mbminlen * (size_t) (*endptr - buf);
|
|
return res;
|
|
}
|
|
|
|
|
|
/*
|
|
This is a fast version optimized for the case of radix 10 / -10
|
|
*/
|
|
|
|
static size_t
|
|
my_l10tostr_mb2_or_mb4(CHARSET_INFO *cs,
|
|
char *dst, size_t len, int radix, long int val)
|
|
{
|
|
char buffer[66];
|
|
register char *p, *db, *de;
|
|
long int new_val;
|
|
int sl= 0;
|
|
unsigned long int uval = (unsigned long int) val;
|
|
|
|
p= &buffer[sizeof(buffer) - 1];
|
|
*p= '\0';
|
|
|
|
if (radix < 0)
|
|
{
|
|
if (val < 0)
|
|
{
|
|
sl= 1;
|
|
/* Avoid integer overflow in (-val) for LONGLONG_MIN (BUG#31799). */
|
|
uval = (unsigned long int)0 - uval;
|
|
}
|
|
}
|
|
|
|
new_val = (long) (uval / 10);
|
|
*--p = '0'+ (char) (uval - (unsigned long) new_val * 10);
|
|
val= new_val;
|
|
|
|
while (val != 0)
|
|
{
|
|
new_val= val / 10;
|
|
*--p= '0' + (char) (val - new_val * 10);
|
|
val= new_val;
|
|
}
|
|
|
|
if (sl)
|
|
{
|
|
*--p= '-';
|
|
}
|
|
|
|
for ( db= dst, de= dst + len ; (dst < de) && *p ; p++)
|
|
{
|
|
int cnvres= my_ci_wc_mb(cs, (my_wc_t) p[0], (uchar*) dst, (uchar*) de);
|
|
if (cnvres > 0)
|
|
dst+= cnvres;
|
|
else
|
|
break;
|
|
}
|
|
return (int) (dst - db);
|
|
}
|
|
|
|
|
|
static size_t
|
|
my_ll10tostr_mb2_or_mb4(CHARSET_INFO *cs,
|
|
char *dst, size_t len, int radix, longlong val)
|
|
{
|
|
char buffer[65];
|
|
register char *p, *db, *de;
|
|
long long_val;
|
|
int sl= 0;
|
|
ulonglong uval= (ulonglong) val;
|
|
|
|
if (radix < 0)
|
|
{
|
|
if (val < 0)
|
|
{
|
|
sl= 1;
|
|
/* Avoid integer overflow in (-val) for LONGLONG_MIN (BUG#31799). */
|
|
uval = (ulonglong)0 - uval;
|
|
}
|
|
}
|
|
|
|
p= &buffer[sizeof(buffer)-1];
|
|
*p='\0';
|
|
|
|
if (uval == 0)
|
|
{
|
|
*--p= '0';
|
|
goto cnv;
|
|
}
|
|
|
|
while (uval > (ulonglong) LONG_MAX)
|
|
{
|
|
ulonglong quo= uval/(uint) 10;
|
|
uint rem= (uint) (uval- quo* (uint) 10);
|
|
*--p= '0' + rem;
|
|
uval= quo;
|
|
}
|
|
|
|
long_val= (long) uval;
|
|
while (long_val != 0)
|
|
{
|
|
long quo= long_val/10;
|
|
*--p= (char) ('0' + (long_val - quo*10));
|
|
long_val= quo;
|
|
}
|
|
|
|
cnv:
|
|
if (sl)
|
|
{
|
|
*--p= '-';
|
|
}
|
|
|
|
for ( db= dst, de= dst + len ; (dst < de) && *p ; p++)
|
|
{
|
|
int cnvres= my_ci_wc_mb(cs, (my_wc_t) p[0], (uchar*) dst, (uchar*) de);
|
|
if (cnvres > 0)
|
|
dst+= cnvres;
|
|
else
|
|
break;
|
|
}
|
|
return (int) (dst -db);
|
|
}
|
|
|
|
#endif /* HAVE_CHARSET_mb2_or_mb4 */
|
|
|
|
|
|
#ifdef HAVE_CHARSET_mb2
|
|
/**
|
|
Convert a Unicode code point to a digit.
|
|
@param wc - the input Unicode code point
|
|
@param[OUT] c - the output character representing the digit value 0..9
|
|
|
|
@return 0 - if wc is a good digit
|
|
@return 1 - if wc is not a digit
|
|
*/
|
|
static inline my_bool
|
|
wc2digit_uchar(uchar *c, my_wc_t wc)
|
|
{
|
|
return wc > '9' || (c[0]= (uchar) (wc - '0')) > 9;
|
|
}
|
|
|
|
|
|
static longlong
|
|
my_strtoll10_mb2(CHARSET_INFO *cs __attribute__((unused)),
|
|
const char *nptr, char **endptr, int *error)
|
|
{
|
|
const uchar *s, *end, *start, *n_end, *true_end;
|
|
uchar UNINIT_VAR(c);
|
|
unsigned long i, j, k;
|
|
ulonglong li;
|
|
int negative;
|
|
ulong cutoff, cutoff2, cutoff3;
|
|
my_wc_t wc;
|
|
int res;
|
|
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
|
|
|
|
s= (const uchar *) nptr;
|
|
/* If fixed length string */
|
|
if (endptr)
|
|
{
|
|
/*
|
|
Make sure string length is even.
|
|
Odd length indicates a bug in the caller.
|
|
Assert in debug, round in production.
|
|
*/
|
|
DBUG_ASSERT((*endptr - (const char *) s) % 2 == 0);
|
|
end= s + ((*endptr - (const char*) s) / 2) * 2;
|
|
|
|
for ( ; ; ) /* Skip leading spaces and tabs */
|
|
{
|
|
if ((res= mb_wc(cs, &wc, s, end)) <= 0)
|
|
goto no_conv;
|
|
s+= res;
|
|
if (wc != ' ' && wc != '\t')
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/* We don't support null terminated strings in UCS2 */
|
|
goto no_conv;
|
|
}
|
|
|
|
/* Check for a sign. */
|
|
negative= 0;
|
|
if (wc == '-')
|
|
{
|
|
*error= -1; /* Mark as negative number */
|
|
negative= 1;
|
|
if ((res= mb_wc(cs, &wc, s, end)) <= 0)
|
|
goto no_conv;
|
|
s+= res; /* wc is now expected to hold the first digit. */
|
|
cutoff= MAX_NEGATIVE_NUMBER / LFACTOR2;
|
|
cutoff2= (MAX_NEGATIVE_NUMBER % LFACTOR2) / 100;
|
|
cutoff3= MAX_NEGATIVE_NUMBER % 100;
|
|
}
|
|
else
|
|
{
|
|
*error= 0;
|
|
if (wc == '+')
|
|
{
|
|
if ((res= mb_wc(cs, &wc, s, end)) <= 0)
|
|
goto no_conv;
|
|
s+= res; /* wc is now expected to hold the first digit. */
|
|
}
|
|
cutoff= ULONGLONG_MAX / LFACTOR2;
|
|
cutoff2= ULONGLONG_MAX % LFACTOR2 / 100;
|
|
cutoff3= ULONGLONG_MAX % 100;
|
|
}
|
|
|
|
/*
|
|
The code below assumes that 'wc' holds the first digit
|
|
and 's' points to the next character after it.
|
|
|
|
Scan pre-zeros if any.
|
|
*/
|
|
if (wc == '0')
|
|
{
|
|
i= 0;
|
|
for ( ; ; s+= res)
|
|
{
|
|
if (s == end)
|
|
goto end_i; /* Return 0 */
|
|
if ((res= mb_wc(cs, &wc, s, end)) <= 0)
|
|
goto no_conv;
|
|
if (wc != '0')
|
|
break;
|
|
}
|
|
n_end= s + 2 * INIT_CNT;
|
|
}
|
|
else
|
|
{
|
|
/* Read first digit to check that it's a valid number */
|
|
if ((i= (wc - '0')) > 9)
|
|
goto no_conv;
|
|
n_end= s + 2 * (INIT_CNT-1);
|
|
}
|
|
|
|
/* Handle first 9 digits and store them in i */
|
|
if (n_end > end)
|
|
n_end= end;
|
|
for ( ; ; s+= res)
|
|
{
|
|
if ((res= mb_wc(cs, &wc, s, n_end)) <= 0)
|
|
break;
|
|
if (wc2digit_uchar(&c, wc))
|
|
goto end_i;
|
|
i= i*10+c;
|
|
}
|
|
if (s == end)
|
|
goto end_i;
|
|
|
|
/* Handle next 9 digits and store them in j */
|
|
j= 0;
|
|
start= s; /* Used to know how much to shift i */
|
|
n_end= true_end= s + 2 * INIT_CNT;
|
|
if (n_end > end)
|
|
n_end= end;
|
|
do
|
|
{
|
|
if ((res= mb_wc(cs, &wc, s, end)) <= 0)
|
|
goto no_conv;
|
|
if (wc2digit_uchar(&c, wc))
|
|
goto end_i_and_j;
|
|
s+= res;
|
|
j= j * 10 + c;
|
|
} while (s != n_end);
|
|
if (s == end)
|
|
{
|
|
if (s != true_end)
|
|
goto end_i_and_j;
|
|
goto end3;
|
|
}
|
|
|
|
/* Handle the next 1 or 2 digits and store them in k */
|
|
if ((res= mb_wc(cs, &wc, s, end)) <= 0)
|
|
goto no_conv;
|
|
if ((k= (wc - '0')) > 9)
|
|
goto end3;
|
|
s+= res;
|
|
|
|
if (s == end)
|
|
goto end4;
|
|
if ((res= mb_wc(cs, &wc, s, end)) <= 0)
|
|
goto no_conv;
|
|
if (wc2digit_uchar(&c, wc))
|
|
goto end4;
|
|
s+= res;
|
|
k= k*10+c;
|
|
*endptr= (char*) s;
|
|
|
|
/* number string should have ended here */
|
|
if (s != end && mb_wc(cs, &wc, s, end) > 0 && ((uchar) (wc - '0')) <= 9)
|
|
goto overflow;
|
|
|
|
/* Check that we didn't get an overflow with the last digit */
|
|
if (i > cutoff || (i == cutoff && ((j > cutoff2 || j == cutoff2) &&
|
|
k > cutoff3)))
|
|
goto overflow;
|
|
li=i*LFACTOR2+ (ulonglong) j*100 + k;
|
|
return (longlong) li;
|
|
|
|
overflow: /* *endptr is set here */
|
|
*error= MY_ERRNO_ERANGE;
|
|
return negative ? LONGLONG_MIN : (longlong) ULONGLONG_MAX;
|
|
|
|
end_i:
|
|
*endptr= (char*) s;
|
|
return (negative ? ((longlong) -(long) i) : (longlong) i);
|
|
|
|
end_i_and_j:
|
|
li= (ulonglong) i * lfactor[(size_t) (s-start) / 2] + j;
|
|
*endptr= (char*) s;
|
|
return (negative ? -((longlong) li) : (longlong) li);
|
|
|
|
end3:
|
|
li=(ulonglong) i*LFACTOR+ (ulonglong) j;
|
|
*endptr= (char*) s;
|
|
return (negative ? -((longlong) li) : (longlong) li);
|
|
|
|
end4:
|
|
li=(ulonglong) i*LFACTOR1+ (ulonglong) j * 10 + k;
|
|
*endptr= (char*) s;
|
|
if (negative)
|
|
{
|
|
if (li > MAX_NEGATIVE_NUMBER)
|
|
goto overflow;
|
|
return -((longlong) li);
|
|
}
|
|
return (longlong) li;
|
|
|
|
no_conv:
|
|
/* There was no number to convert. */
|
|
*error= MY_ERRNO_EDOM;
|
|
*endptr= (char *) nptr;
|
|
return 0;
|
|
}
|
|
|
|
|
|
static size_t
|
|
my_scan_mb2(CHARSET_INFO *cs __attribute__((unused)),
|
|
const char *str, const char *end, int sequence_type)
|
|
{
|
|
const char *str0= str;
|
|
my_wc_t wc;
|
|
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
|
|
int res;
|
|
|
|
switch (sequence_type)
|
|
{
|
|
case MY_SEQ_SPACES:
|
|
for (res= mb_wc(cs, &wc, (const uchar *) str, (const uchar *) end);
|
|
res > 0 && wc == ' ';
|
|
str+= res,
|
|
res= mb_wc(cs, &wc, (const uchar *) str, (const uchar *) end))
|
|
{
|
|
}
|
|
return (size_t) (str - str0);
|
|
case MY_SEQ_NONSPACES:
|
|
DBUG_ASSERT(0); /* Not implemented */
|
|
/* pass through */
|
|
default:
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
|
|
static void
|
|
my_fill_mb2(CHARSET_INFO *cs, char *s, size_t slen, int fill)
|
|
{
|
|
char buf[10], *last;
|
|
size_t buflen, remainder;
|
|
|
|
DBUG_ASSERT((slen % 2) == 0);
|
|
|
|
buflen= my_ci_wc_mb(cs, (my_wc_t) fill, (uchar*) buf,
|
|
(uchar*) buf + sizeof(buf));
|
|
|
|
DBUG_ASSERT(buflen > 0);
|
|
|
|
/*
|
|
"last" in the last position where a sequence of "buflen" bytes can start.
|
|
*/
|
|
for (last= s + slen - buflen; s <= last; s+= buflen)
|
|
{
|
|
/* Enough space for the character */
|
|
memcpy(s, buf, buflen);
|
|
}
|
|
|
|
/*
|
|
If there are some more space which is not enough
|
|
for the whole multibyte character, then add trailing zeros.
|
|
*/
|
|
if ((remainder= last + buflen - s) > 0)
|
|
bzero(s, (size_t) remainder);
|
|
}
|
|
|
|
|
|
static size_t
|
|
my_vsnprintf_mb2(char *dst, size_t n, const char* fmt, va_list ap)
|
|
{
|
|
char *start=dst, *end= dst + n - 1;
|
|
for (; *fmt ; fmt++)
|
|
{
|
|
if (fmt[0] != '%')
|
|
{
|
|
if (dst == end) /* End of buffer */
|
|
break;
|
|
|
|
*dst++='\0';
|
|
*dst++= *fmt; /* Copy ordinary char */
|
|
continue;
|
|
}
|
|
|
|
fmt++;
|
|
|
|
/* Skip if max size is used (to be compatible with printf) */
|
|
while ( (*fmt >= '0' && *fmt <= '9') || *fmt == '.' || *fmt == '-')
|
|
fmt++;
|
|
|
|
if (*fmt == 'l')
|
|
fmt++;
|
|
|
|
if (*fmt == 's') /* String parameter */
|
|
{
|
|
char *par= va_arg(ap, char *);
|
|
size_t plen;
|
|
size_t left_len= (size_t)(end-dst);
|
|
if (!par)
|
|
par= (char*) "(null)";
|
|
plen= strlen(par);
|
|
if (left_len <= plen * 2)
|
|
plen = left_len / 2 - 1;
|
|
|
|
for ( ; plen ; plen--, dst+=2, par++)
|
|
{
|
|
dst[0]= '\0';
|
|
dst[1]= par[0];
|
|
}
|
|
continue;
|
|
}
|
|
else if (*fmt == 'd' || *fmt == 'u') /* Integer parameter */
|
|
{
|
|
int iarg;
|
|
char nbuf[16];
|
|
char *pbuf= nbuf;
|
|
|
|
if ((size_t) (end - dst) < 32)
|
|
break;
|
|
iarg= va_arg(ap, int);
|
|
if (*fmt == 'd')
|
|
int10_to_str((long) iarg, nbuf, -10);
|
|
else
|
|
int10_to_str((long) (uint) iarg, nbuf,10);
|
|
|
|
for (; pbuf[0]; pbuf++)
|
|
{
|
|
*dst++= '\0';
|
|
*dst++= *pbuf;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
/* We come here on '%%', unknown code or too long parameter */
|
|
if (dst == end)
|
|
break;
|
|
*dst++= '\0';
|
|
*dst++= '%'; /* % used as % or unknown code */
|
|
}
|
|
|
|
DBUG_ASSERT(dst <= end);
|
|
*dst='\0'; /* End of errmessage */
|
|
return (size_t) (dst - start);
|
|
}
|
|
|
|
|
|
static size_t
|
|
my_snprintf_mb2(CHARSET_INFO *cs __attribute__((unused)),
|
|
char* to, size_t n, const char* fmt, ...)
|
|
{
|
|
size_t ret;
|
|
va_list args;
|
|
va_start(args,fmt);
|
|
ret= my_vsnprintf_mb2(to, n, fmt, args);
|
|
va_end(args);
|
|
return ret;
|
|
}
|
|
|
|
|
|
static size_t
|
|
my_lengthsp_mb2(CHARSET_INFO *cs __attribute__((unused)),
|
|
const char *ptr, size_t length)
|
|
{
|
|
const char *end= ptr + length;
|
|
while (end > ptr + 1 && end[-1] == ' ' && end[-2] == '\0')
|
|
end-= 2;
|
|
return (size_t) (end - ptr);
|
|
}
|
|
|
|
#endif /* HAVE_CHARSET_mb2*/
|
|
|
|
|
|
/*
|
|
Next part is actually HAVE_CHARSET_utf16-specific,
|
|
but the JSON functions needed my_utf16_uni()
|
|
so the #ifdef was moved lower.
|
|
*/
|
|
#include "ctype-utf16.h"
|
|
|
|
#define IS_MB2_CHAR(b0,b1) (!MY_UTF16_SURROGATE_HEAD(b0))
|
|
#define IS_MB4_CHAR(b0,b1,b2,b3) (MY_UTF16_HIGH_HEAD(b0) && MY_UTF16_LOW_HEAD(b2))
|
|
|
|
static inline int my_weight_mb2_utf16mb2_general_ci(uchar b0, uchar b1)
|
|
{
|
|
my_wc_t wc= MY_UTF16_WC2(b0, b1);
|
|
MY_UNICASE_CHARACTER *page= my_unicase_default_pages[wc >> 8];
|
|
return (int) (page ? page[wc & 0xFF].sort : wc);
|
|
}
|
|
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16_general_ci
|
|
#define DEFINE_STRNXFRM_UNICODE
|
|
#define DEFINE_STRNXFRM_UNICODE_NOPAD
|
|
#define MY_MB_WC(cs, pwc, s, e) my_mb_wc_utf16_quick(pwc, s, e)
|
|
#define OPTIMIZE_ASCII 0
|
|
#define UNICASE_MAXCHAR MY_UNICASE_INFO_DEFAULT_MAXCHAR
|
|
#define UNICASE_PAGE0 my_unicase_default_page00
|
|
#define UNICASE_PAGES my_unicase_default_pages
|
|
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
|
|
#define WEIGHT_MB2(b0,b1) my_weight_mb2_utf16mb2_general_ci(b0,b1)
|
|
#define WEIGHT_MB4(b0,b1,b2,b3) MY_CS_REPLACEMENT_CHARACTER
|
|
#include "strcoll.ic"
|
|
|
|
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16_bin
|
|
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
|
|
#define WEIGHT_MB2(b0,b1) ((int) MY_UTF16_WC2(b0, b1))
|
|
#define WEIGHT_MB4(b0,b1,b2,b3) ((int) MY_UTF16_WC4(b0, b1, b2, b3))
|
|
#include "strcoll.ic"
|
|
|
|
#define DEFINE_STRNNCOLLSP_NOPAD
|
|
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16_general_nopad_ci
|
|
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
|
|
#define WEIGHT_MB2(b0,b1) my_weight_mb2_utf16mb2_general_ci(b0,b1)
|
|
#define WEIGHT_MB4(b0,b1,b2,b3) MY_CS_REPLACEMENT_CHARACTER
|
|
#include "strcoll.ic"
|
|
|
|
#define DEFINE_STRNNCOLLSP_NOPAD
|
|
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16_nopad_bin
|
|
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
|
|
#define WEIGHT_MB2(b0,b1) ((int) MY_UTF16_WC2(b0, b1))
|
|
#define WEIGHT_MB4(b0,b1,b2,b3) ((int) MY_UTF16_WC4(b0, b1, b2, b3))
|
|
#include "strcoll.ic"
|
|
|
|
#undef IS_MB2_CHAR
|
|
#undef IS_MB4_CHAR
|
|
|
|
/*
|
|
These two functions are used in JSON library, so made exportable
|
|
and unconditionally compiled into the library.
|
|
*/
|
|
|
|
/*static*/ int
|
|
my_utf16_uni(CHARSET_INFO *cs __attribute__((unused)),
|
|
my_wc_t *pwc, const uchar *s, const uchar *e)
|
|
{
|
|
return my_mb_wc_utf16_quick(pwc, s, e);
|
|
}
|
|
|
|
|
|
/*static*/ int
|
|
my_uni_utf16(CHARSET_INFO *cs __attribute__((unused)),
|
|
my_wc_t wc, uchar *s, uchar *e)
|
|
{
|
|
if (wc <= 0xFFFF)
|
|
{
|
|
if (s + 2 > e)
|
|
return MY_CS_TOOSMALL2;
|
|
if (MY_UTF16_SURROGATE(wc))
|
|
return MY_CS_ILUNI;
|
|
*s++= (uchar) (wc >> 8);
|
|
*s= (uchar) (wc & 0xFF);
|
|
return 2;
|
|
}
|
|
|
|
if (wc <= 0x10FFFF)
|
|
{
|
|
if (s + 4 > e)
|
|
return MY_CS_TOOSMALL4;
|
|
*s++= (uchar) ((wc-= 0x10000) >> 18) | 0xD8;
|
|
*s++= (uchar) (wc >> 10) & 0xFF;
|
|
*s++= (uchar) ((wc >> 8) & 3) | 0xDC;
|
|
*s= (uchar) wc & 0xFF;
|
|
return 4;
|
|
}
|
|
|
|
return MY_CS_ILUNI;
|
|
}
|
|
|
|
|
|
#ifdef HAVE_CHARSET_utf16
|
|
|
|
const char charset_name_utf16le[]= "utf16le";
|
|
#define charset_name_utf16le_length (sizeof(charset_name_utf16le)-1)
|
|
|
|
static inline void
|
|
my_tolower_utf16(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
|
|
{
|
|
MY_UNICASE_CHARACTER *page;
|
|
if ((*wc <= uni_plane->maxchar) && (page= uni_plane->page[*wc >> 8]))
|
|
*wc= page[*wc & 0xFF].tolower;
|
|
}
|
|
|
|
|
|
static inline void
|
|
my_toupper_utf16(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
|
|
{
|
|
MY_UNICASE_CHARACTER *page;
|
|
if ((*wc <= uni_plane->maxchar) && (page= uni_plane->page[*wc >> 8]))
|
|
*wc= page[*wc & 0xFF].toupper;
|
|
}
|
|
|
|
|
|
static inline void
|
|
my_tosort_utf16(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
|
|
{
|
|
if (*wc <= uni_plane->maxchar)
|
|
{
|
|
MY_UNICASE_CHARACTER *page;
|
|
if ((page= uni_plane->page[*wc >> 8]))
|
|
*wc= page[*wc & 0xFF].sort;
|
|
}
|
|
else
|
|
{
|
|
*wc= MY_CS_REPLACEMENT_CHARACTER;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
static size_t
|
|
my_caseup_utf16(CHARSET_INFO *cs, const char *src, size_t srclen,
|
|
char *dst, size_t dstlen)
|
|
{
|
|
my_wc_t wc;
|
|
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
|
|
my_charset_conv_wc_mb wc_mb= cs->cset->wc_mb;
|
|
int res;
|
|
const char *srcend= src + srclen;
|
|
char *dstend= dst + dstlen;
|
|
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
|
|
DBUG_ASSERT(srclen <= dstlen);
|
|
|
|
while ((src < srcend) &&
|
|
(res= mb_wc(cs, &wc, (uchar *) src, (uchar *) srcend)) > 0)
|
|
{
|
|
my_toupper_utf16(uni_plane, &wc);
|
|
if (res != wc_mb(cs, wc, (uchar *) dst, (uchar *) dstend))
|
|
break;
|
|
src+= res;
|
|
dst+= res;
|
|
}
|
|
return srclen;
|
|
}
|
|
|
|
|
|
static void
|
|
my_hash_sort_utf16_nopad(CHARSET_INFO *cs,
|
|
const uchar *s, size_t slen,
|
|
ulong *nr1, ulong *nr2)
|
|
{
|
|
my_wc_t wc;
|
|
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
|
|
int res;
|
|
const uchar *e= s + slen;
|
|
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
|
|
register ulong m1= *nr1, m2= *nr2;
|
|
|
|
while ((s < e) && (res= mb_wc(cs, &wc, (uchar *) s, (uchar *) e)) > 0)
|
|
{
|
|
my_tosort_utf16(uni_plane, &wc);
|
|
MY_HASH_ADD_16(m1, m2, wc);
|
|
s+= res;
|
|
}
|
|
*nr1= m1;
|
|
*nr2= m2;
|
|
}
|
|
|
|
|
|
static void
|
|
my_hash_sort_utf16(CHARSET_INFO *cs, const uchar *s, size_t slen,
|
|
ulong *nr1, ulong *nr2)
|
|
{
|
|
size_t lengthsp= my_ci_lengthsp(cs, (const char *) s, slen);
|
|
my_hash_sort_utf16_nopad(cs, s, lengthsp, nr1, nr2);
|
|
}
|
|
|
|
|
|
static size_t
|
|
my_casedn_utf16(CHARSET_INFO *cs, const char *src, size_t srclen,
|
|
char *dst, size_t dstlen)
|
|
{
|
|
my_wc_t wc;
|
|
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
|
|
my_charset_conv_wc_mb wc_mb= cs->cset->wc_mb;
|
|
int res;
|
|
const char *srcend= src + srclen;
|
|
char *dstend= dst + dstlen;
|
|
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
|
|
DBUG_ASSERT(srclen <= dstlen);
|
|
|
|
while ((src < srcend) &&
|
|
(res= mb_wc(cs, &wc, (uchar *) src, (uchar *) srcend)) > 0)
|
|
{
|
|
my_tolower_utf16(uni_plane, &wc);
|
|
if (res != wc_mb(cs, wc, (uchar *) dst, (uchar *) dstend))
|
|
break;
|
|
src+= res;
|
|
dst+= res;
|
|
}
|
|
return srclen;
|
|
}
|
|
|
|
|
|
static int
|
|
my_charlen_utf16(CHARSET_INFO *cs, const uchar *str, const uchar *end)
|
|
{
|
|
my_wc_t wc;
|
|
return my_ci_mb_wc(cs, &wc, str, end);
|
|
}
|
|
|
|
|
|
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16
|
|
#define CHARLEN(cs,str,end) my_charlen_utf16(cs,str,end)
|
|
#define DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
|
|
#include "ctype-mb.ic"
|
|
#undef MY_FUNCTION_NAME
|
|
#undef CHARLEN
|
|
#undef DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
|
|
/* Defines my_well_formed_char_length_utf16 */
|
|
|
|
|
|
static size_t
|
|
my_numchars_utf16(CHARSET_INFO *cs,
|
|
const char *b, const char *e)
|
|
{
|
|
size_t nchars= 0;
|
|
for ( ; ; nchars++)
|
|
{
|
|
size_t charlen= my_ismbchar(cs, b, e);
|
|
if (!charlen)
|
|
break;
|
|
b+= charlen;
|
|
}
|
|
return nchars;
|
|
}
|
|
|
|
|
|
static size_t
|
|
my_charpos_utf16(CHARSET_INFO *cs,
|
|
const char *b, const char *e, size_t pos)
|
|
{
|
|
const char *b0= b;
|
|
uint charlen;
|
|
|
|
for ( ; pos; b+= charlen, pos--)
|
|
{
|
|
if (!(charlen= my_ismbchar(cs, b, e)))
|
|
return (e + 2 - b0); /* Error, return pos outside the string */
|
|
}
|
|
return (size_t) (pos ? (e + 2 - b0) : (b - b0));
|
|
}
|
|
|
|
|
|
static int
|
|
my_wildcmp_utf16_ci(CHARSET_INFO *cs,
|
|
const char *str,const char *str_end,
|
|
const char *wildstr,const char *wildend,
|
|
int escape, int w_one, int w_many)
|
|
{
|
|
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
|
|
return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend,
|
|
escape, w_one, w_many, uni_plane);
|
|
}
|
|
|
|
|
|
static int
|
|
my_wildcmp_utf16_bin(CHARSET_INFO *cs,
|
|
const char *str,const char *str_end,
|
|
const char *wildstr,const char *wildend,
|
|
int escape, int w_one, int w_many)
|
|
{
|
|
return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend,
|
|
escape, w_one, w_many, NULL);
|
|
}
|
|
|
|
|
|
static void
|
|
my_hash_sort_utf16_nopad_bin(CHARSET_INFO *cs __attribute__((unused)),
|
|
const uchar *pos, size_t len,
|
|
ulong *nr1, ulong *nr2)
|
|
{
|
|
const uchar *end= pos + len;
|
|
register ulong m1= *nr1, m2= *nr2;
|
|
|
|
for ( ; pos < end ; pos++)
|
|
{
|
|
MY_HASH_ADD(m1, m2, (uint)*pos);
|
|
}
|
|
*nr1= m1;
|
|
*nr2= m2;
|
|
}
|
|
|
|
|
|
static void
|
|
my_hash_sort_utf16_bin(CHARSET_INFO *cs,
|
|
const uchar *pos, size_t len, ulong *nr1, ulong *nr2)
|
|
{
|
|
size_t lengthsp= my_ci_lengthsp(cs, (const char *) pos, len);
|
|
my_hash_sort_utf16_nopad_bin(cs, pos, lengthsp, nr1, nr2);
|
|
}
|
|
|
|
|
|
static MY_COLLATION_HANDLER my_collation_utf16_general_ci_handler =
|
|
{
|
|
NULL, /* init */
|
|
my_strnncoll_utf16_general_ci,
|
|
my_strnncollsp_utf16_general_ci,
|
|
my_strnxfrm_utf16_general_ci,
|
|
my_strnxfrmlen_unicode,
|
|
my_like_range_generic,
|
|
my_wildcmp_utf16_ci,
|
|
my_strcasecmp_mb2_or_mb4,
|
|
my_instr_mb,
|
|
my_hash_sort_utf16,
|
|
my_propagate_simple
|
|
};
|
|
|
|
|
|
static MY_COLLATION_HANDLER my_collation_utf16_bin_handler =
|
|
{
|
|
NULL, /* init */
|
|
my_strnncoll_utf16_bin,
|
|
my_strnncollsp_utf16_bin,
|
|
my_strnxfrm_unicode_full_bin,
|
|
my_strnxfrmlen_unicode_full_bin,
|
|
my_like_range_generic,
|
|
my_wildcmp_utf16_bin,
|
|
my_strcasecmp_mb2_or_mb4,
|
|
my_instr_mb,
|
|
my_hash_sort_utf16_bin,
|
|
my_propagate_simple
|
|
};
|
|
|
|
|
|
static MY_COLLATION_HANDLER my_collation_utf16_general_nopad_ci_handler =
|
|
{
|
|
NULL, /* init */
|
|
my_strnncoll_utf16_general_ci,
|
|
my_strnncollsp_utf16_general_nopad_ci,
|
|
my_strnxfrm_nopad_utf16_general_ci,
|
|
my_strnxfrmlen_unicode,
|
|
my_like_range_generic,
|
|
my_wildcmp_utf16_ci,
|
|
my_strcasecmp_mb2_or_mb4,
|
|
my_instr_mb,
|
|
my_hash_sort_utf16_nopad,
|
|
my_propagate_simple
|
|
};
|
|
|
|
|
|
static MY_COLLATION_HANDLER my_collation_utf16_nopad_bin_handler =
|
|
{
|
|
NULL, /* init */
|
|
my_strnncoll_utf16_bin,
|
|
my_strnncollsp_utf16_nopad_bin,
|
|
my_strnxfrm_unicode_full_nopad_bin,
|
|
my_strnxfrmlen_unicode_full_bin,
|
|
my_like_range_generic,
|
|
my_wildcmp_utf16_bin,
|
|
my_strcasecmp_mb2_or_mb4,
|
|
my_instr_mb,
|
|
my_hash_sort_utf16_nopad_bin,
|
|
my_propagate_simple
|
|
};
|
|
|
|
|
|
MY_CHARSET_HANDLER my_charset_utf16_handler=
|
|
{
|
|
NULL, /* init */
|
|
my_numchars_utf16,
|
|
my_charpos_utf16,
|
|
my_lengthsp_mb2,
|
|
my_numcells_mb,
|
|
my_utf16_uni, /* mb_wc */
|
|
my_uni_utf16, /* wc_mb */
|
|
my_mb_ctype_mb,
|
|
my_caseup_str_mb2_or_mb4,
|
|
my_casedn_str_mb2_or_mb4,
|
|
my_caseup_utf16,
|
|
my_casedn_utf16,
|
|
my_snprintf_mb2,
|
|
my_l10tostr_mb2_or_mb4,
|
|
my_ll10tostr_mb2_or_mb4,
|
|
my_fill_mb2,
|
|
my_strntol_mb2_or_mb4,
|
|
my_strntoul_mb2_or_mb4,
|
|
my_strntoll_mb2_or_mb4,
|
|
my_strntoull_mb2_or_mb4,
|
|
my_strntod_mb2_or_mb4,
|
|
my_strtoll10_mb2,
|
|
my_strntoull10rnd_mb2_or_mb4,
|
|
my_scan_mb2,
|
|
my_charlen_utf16,
|
|
my_well_formed_char_length_utf16,
|
|
my_copy_fix_mb2_or_mb4,
|
|
my_uni_utf16,
|
|
my_wc_to_printable_generic
|
|
};
|
|
|
|
|
|
struct charset_info_st my_charset_utf16_general_ci=
|
|
{
|
|
54,0,0, /* number */
|
|
MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
|
|
{ charset_name_utf16, charset_name_utf16_length }, /* cs name */
|
|
{ STRING_WITH_LEN("utf16_general_ci") }, /* name */
|
|
"UTF-16 Unicode", /* comment */
|
|
NULL, /* tailoring */
|
|
NULL, /* ctype */
|
|
NULL, /* to_lower */
|
|
NULL, /* to_upper */
|
|
NULL, /* sort_order */
|
|
NULL, /* uca */
|
|
NULL, /* tab_to_uni */
|
|
NULL, /* tab_from_uni */
|
|
&my_unicase_default, /* caseinfo */
|
|
NULL, /* state_map */
|
|
NULL, /* ident_map */
|
|
1, /* strxfrm_multiply */
|
|
1, /* caseup_multiply */
|
|
1, /* casedn_multiply */
|
|
2, /* mbminlen */
|
|
4, /* mbmaxlen */
|
|
0, /* min_sort_char */
|
|
0xFFFF, /* max_sort_char */
|
|
' ', /* pad char */
|
|
0, /* escape_with_backslash_is_dangerous */
|
|
1, /* levels_for_order */
|
|
&my_charset_utf16_handler,
|
|
&my_collation_utf16_general_ci_handler
|
|
};
|
|
|
|
|
|
struct charset_info_st my_charset_utf16_bin=
|
|
{
|
|
55,0,0, /* number */
|
|
MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
|
|
{ charset_name_utf16, charset_name_utf16_length }, /* cs name */
|
|
{ STRING_WITH_LEN("utf16_bin") }, /* name */
|
|
"UTF-16 Unicode", /* comment */
|
|
NULL, /* tailoring */
|
|
NULL, /* ctype */
|
|
NULL, /* to_lower */
|
|
NULL, /* to_upper */
|
|
NULL, /* sort_order */
|
|
NULL, /* uca */
|
|
NULL, /* tab_to_uni */
|
|
NULL, /* tab_from_uni */
|
|
&my_unicase_default, /* caseinfo */
|
|
NULL, /* state_map */
|
|
NULL, /* ident_map */
|
|
1, /* strxfrm_multiply */
|
|
1, /* caseup_multiply */
|
|
1, /* casedn_multiply */
|
|
2, /* mbminlen */
|
|
4, /* mbmaxlen */
|
|
0, /* min_sort_char */
|
|
0xFFFF, /* max_sort_char */
|
|
' ', /* pad char */
|
|
0, /* escape_with_backslash_is_dangerous */
|
|
1, /* levels_for_order */
|
|
&my_charset_utf16_handler,
|
|
&my_collation_utf16_bin_handler
|
|
};
|
|
|
|
|
|
struct charset_info_st my_charset_utf16_general_nopad_ci=
|
|
{
|
|
MY_NOPAD_ID(54),0,0, /* number */
|
|
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII|MY_CS_NOPAD,
|
|
{ charset_name_utf16, charset_name_utf16_length }, /* cs name */
|
|
{ STRING_WITH_LEN("utf16_general_nopad_ci") }, /* name */
|
|
"UTF-16 Unicode", /* comment */
|
|
NULL, /* tailoring */
|
|
NULL, /* ctype */
|
|
NULL, /* to_lower */
|
|
NULL, /* to_upper */
|
|
NULL, /* sort_order */
|
|
NULL, /* uca */
|
|
NULL, /* tab_to_uni */
|
|
NULL, /* tab_from_uni */
|
|
&my_unicase_default, /* caseinfo */
|
|
NULL, /* state_map */
|
|
NULL, /* ident_map */
|
|
1, /* strxfrm_multiply */
|
|
1, /* caseup_multiply */
|
|
1, /* casedn_multiply */
|
|
2, /* mbminlen */
|
|
4, /* mbmaxlen */
|
|
0, /* min_sort_char */
|
|
0xFFFF, /* max_sort_char */
|
|
' ', /* pad char */
|
|
0, /* escape_with_backslash_is_dangerous */
|
|
1, /* levels_for_order */
|
|
&my_charset_utf16_handler,
|
|
&my_collation_utf16_general_nopad_ci_handler
|
|
};
|
|
|
|
|
|
struct charset_info_st my_charset_utf16_nopad_bin=
|
|
{
|
|
MY_NOPAD_ID(55),0,0, /* number */
|
|
MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII|
|
|
MY_CS_NOPAD,
|
|
{ charset_name_utf16, charset_name_utf16_length}, /* cs name */
|
|
{ STRING_WITH_LEN("utf16_nopad_bin") }, /* name */
|
|
"UTF-16 Unicode", /* comment */
|
|
NULL, /* tailoring */
|
|
NULL, /* ctype */
|
|
NULL, /* to_lower */
|
|
NULL, /* to_upper */
|
|
NULL, /* sort_order */
|
|
NULL, /* uca */
|
|
NULL, /* tab_to_uni */
|
|
NULL, /* tab_from_uni */
|
|
&my_unicase_default, /* caseinfo */
|
|
NULL, /* state_map */
|
|
NULL, /* ident_map */
|
|
1, /* strxfrm_multiply */
|
|
1, /* caseup_multiply */
|
|
1, /* casedn_multiply */
|
|
2, /* mbminlen */
|
|
4, /* mbmaxlen */
|
|
0, /* min_sort_char */
|
|
0xFFFF, /* max_sort_char */
|
|
' ', /* pad char */
|
|
0, /* escape_with_backslash_is_dangerous */
|
|
1, /* levels_for_order */
|
|
&my_charset_utf16_handler,
|
|
&my_collation_utf16_nopad_bin_handler
|
|
};
|
|
|
|
|
|
#define IS_MB2_CHAR(b0,b1) (!MY_UTF16_SURROGATE_HEAD(b1))
|
|
#define IS_MB4_CHAR(b0,b1,b2,b3) (MY_UTF16_HIGH_HEAD(b1) && MY_UTF16_LOW_HEAD(b3))
|
|
|
|
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16le_general_ci
|
|
#define DEFINE_STRNXFRM_UNICODE
|
|
#define DEFINE_STRNXFRM_UNICODE_NOPAD
|
|
#define MY_MB_WC(cs, pwc, s, e) (my_ci_mb_wc(cs, pwc, s, e))
|
|
#define OPTIMIZE_ASCII 0
|
|
#define UNICASE_MAXCHAR MY_UNICASE_INFO_DEFAULT_MAXCHAR
|
|
#define UNICASE_PAGE0 my_unicase_default_page00
|
|
#define UNICASE_PAGES my_unicase_default_pages
|
|
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
|
|
#define WEIGHT_MB2(b0,b1) my_weight_mb2_utf16mb2_general_ci(b1,b0)
|
|
#define WEIGHT_MB4(b0,b1,b2,b3) MY_CS_REPLACEMENT_CHARACTER
|
|
#include "strcoll.ic"
|
|
|
|
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16le_bin
|
|
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
|
|
#define WEIGHT_MB2(b0,b1) ((int) MY_UTF16_WC2(b1, b0))
|
|
#define WEIGHT_MB4(b0,b1,b2,b3) ((int) MY_UTF16_WC4(b1, b0, b3, b2))
|
|
#include "strcoll.ic"
|
|
|
|
#define DEFINE_STRNNCOLLSP_NOPAD
|
|
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16le_general_nopad_ci
|
|
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
|
|
#define WEIGHT_MB2(b0,b1) my_weight_mb2_utf16mb2_general_ci(b1,b0)
|
|
#define WEIGHT_MB4(b0,b1,b2,b3) MY_CS_REPLACEMENT_CHARACTER
|
|
#include "strcoll.ic"
|
|
|
|
#define DEFINE_STRNNCOLLSP_NOPAD
|
|
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16le_nopad_bin
|
|
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
|
|
#define WEIGHT_MB2(b0,b1) ((int) MY_UTF16_WC2(b1, b0))
|
|
#define WEIGHT_MB4(b0,b1,b2,b3) ((int) MY_UTF16_WC4(b1, b0, b3, b2))
|
|
#include "strcoll.ic"
|
|
|
|
#undef IS_MB2_CHAR
|
|
#undef IS_MB4_CHAR
|
|
|
|
static int
|
|
my_utf16le_uni(CHARSET_INFO *cs __attribute__((unused)),
|
|
my_wc_t *pwc, const uchar *s, const uchar *e)
|
|
{
|
|
my_wc_t lo;
|
|
|
|
if (s + 2 > e)
|
|
return MY_CS_TOOSMALL2;
|
|
|
|
if ((*pwc= uint2korr(s)) < MY_UTF16_SURROGATE_HIGH_FIRST ||
|
|
(*pwc > MY_UTF16_SURROGATE_LOW_LAST))
|
|
return 2; /* [0000-D7FF,E000-FFFF] */
|
|
|
|
if (*pwc >= MY_UTF16_SURROGATE_LOW_FIRST)
|
|
return MY_CS_ILSEQ; /* [DC00-DFFF] Low surrogate part without high part */
|
|
|
|
if (s + 4 > e)
|
|
return MY_CS_TOOSMALL4;
|
|
|
|
s+= 2;
|
|
|
|
if ((lo= uint2korr(s)) < MY_UTF16_SURROGATE_LOW_FIRST ||
|
|
lo > MY_UTF16_SURROGATE_LOW_LAST)
|
|
return MY_CS_ILSEQ; /* Expected low surrogate part, got something else */
|
|
|
|
*pwc= 0x10000 + (((*pwc & 0x3FF) << 10) | (lo & 0x3FF));
|
|
return 4;
|
|
}
|
|
|
|
|
|
static int
|
|
my_uni_utf16le(CHARSET_INFO *cs __attribute__((unused)),
|
|
my_wc_t wc, uchar *s, uchar *e)
|
|
{
|
|
uint32 first, second, total;
|
|
if (wc < MY_UTF16_SURROGATE_HIGH_FIRST ||
|
|
(wc > MY_UTF16_SURROGATE_LOW_LAST &&
|
|
wc <= 0xFFFF))
|
|
{
|
|
if (s + 2 > e)
|
|
return MY_CS_TOOSMALL2;
|
|
int2store(s, wc);
|
|
return 2; /* [0000-D7FF,E000-FFFF] */
|
|
}
|
|
|
|
if (wc < 0xFFFF || wc > 0x10FFFF)
|
|
return MY_CS_ILUNI; /* [D800-DFFF,10FFFF+] */
|
|
|
|
if (s + 4 > e)
|
|
return MY_CS_TOOSMALL4;
|
|
|
|
wc-= 0x10000;
|
|
first= (0xD800 | ((wc >> 10) & 0x3FF));
|
|
second= (0xDC00 | (wc & 0x3FF));
|
|
total= first | (second << 16);
|
|
int4store(s, total);
|
|
return 4; /* [010000-10FFFF] */
|
|
}
|
|
|
|
|
|
static size_t
|
|
my_lengthsp_utf16le(CHARSET_INFO *cs __attribute__((unused)),
|
|
const char *ptr, size_t length)
|
|
{
|
|
const char *end= ptr + length;
|
|
while (end > ptr + 1 && uint2korr(end - 2) == ' ')
|
|
end-= 2;
|
|
return (size_t) (end - ptr);
|
|
}
|
|
|
|
|
|
static MY_COLLATION_HANDLER my_collation_utf16le_general_ci_handler =
|
|
{
|
|
NULL, /* init */
|
|
my_strnncoll_utf16le_general_ci,
|
|
my_strnncollsp_utf16le_general_ci,
|
|
my_strnxfrm_utf16le_general_ci,
|
|
my_strnxfrmlen_unicode,
|
|
my_like_range_generic,
|
|
my_wildcmp_utf16_ci,
|
|
my_strcasecmp_mb2_or_mb4,
|
|
my_instr_mb,
|
|
my_hash_sort_utf16,
|
|
my_propagate_simple
|
|
};
|
|
|
|
|
|
static MY_COLLATION_HANDLER my_collation_utf16le_bin_handler =
|
|
{
|
|
NULL, /* init */
|
|
my_strnncoll_utf16le_bin,
|
|
my_strnncollsp_utf16le_bin,
|
|
my_strnxfrm_unicode_full_bin,
|
|
my_strnxfrmlen_unicode_full_bin,
|
|
my_like_range_generic,
|
|
my_wildcmp_utf16_bin,
|
|
my_strcasecmp_mb2_or_mb4,
|
|
my_instr_mb,
|
|
my_hash_sort_utf16_bin,
|
|
my_propagate_simple
|
|
};
|
|
|
|
|
|
static MY_COLLATION_HANDLER my_collation_utf16le_general_nopad_ci_handler =
|
|
{
|
|
NULL, /* init */
|
|
my_strnncoll_utf16le_general_ci,
|
|
my_strnncollsp_utf16le_general_nopad_ci,
|
|
my_strnxfrm_nopad_utf16le_general_ci,
|
|
my_strnxfrmlen_unicode,
|
|
my_like_range_generic,
|
|
my_wildcmp_utf16_ci,
|
|
my_strcasecmp_mb2_or_mb4,
|
|
my_instr_mb,
|
|
my_hash_sort_utf16_nopad,
|
|
my_propagate_simple
|
|
};
|
|
|
|
|
|
static MY_COLLATION_HANDLER my_collation_utf16le_nopad_bin_handler =
|
|
{
|
|
NULL, /* init */
|
|
my_strnncoll_utf16le_bin,
|
|
my_strnncollsp_utf16le_nopad_bin,
|
|
my_strnxfrm_unicode_full_nopad_bin,
|
|
my_strnxfrmlen_unicode_full_bin,
|
|
my_like_range_generic,
|
|
my_wildcmp_utf16_bin,
|
|
my_strcasecmp_mb2_or_mb4,
|
|
my_instr_mb,
|
|
my_hash_sort_utf16_nopad_bin,
|
|
my_propagate_simple
|
|
};
|
|
|
|
|
|
static MY_CHARSET_HANDLER my_charset_utf16le_handler=
|
|
{
|
|
NULL, /* init */
|
|
my_numchars_utf16,
|
|
my_charpos_utf16,
|
|
my_lengthsp_utf16le,
|
|
my_numcells_mb,
|
|
my_utf16le_uni, /* mb_wc */
|
|
my_uni_utf16le, /* wc_mb */
|
|
my_mb_ctype_mb,
|
|
my_caseup_str_mb2_or_mb4,
|
|
my_casedn_str_mb2_or_mb4,
|
|
my_caseup_utf16,
|
|
my_casedn_utf16,
|
|
my_snprintf_mb2,
|
|
my_l10tostr_mb2_or_mb4,
|
|
my_ll10tostr_mb2_or_mb4,
|
|
my_fill_mb2,
|
|
my_strntol_mb2_or_mb4,
|
|
my_strntoul_mb2_or_mb4,
|
|
my_strntoll_mb2_or_mb4,
|
|
my_strntoull_mb2_or_mb4,
|
|
my_strntod_mb2_or_mb4,
|
|
my_strtoll10_mb2,
|
|
my_strntoull10rnd_mb2_or_mb4,
|
|
my_scan_mb2,
|
|
my_charlen_utf16,
|
|
my_well_formed_char_length_utf16,
|
|
my_copy_fix_mb2_or_mb4,
|
|
my_uni_utf16le,
|
|
my_wc_to_printable_generic
|
|
};
|
|
|
|
|
|
struct charset_info_st my_charset_utf16le_general_ci=
|
|
{
|
|
56,0,0, /* number */
|
|
MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
|
|
{ charset_name_utf16le, charset_name_utf16le_length },
|
|
{ STRING_WITH_LEN("utf16le_general_ci") },/* name */
|
|
"UTF-16LE Unicode", /* comment */
|
|
NULL, /* tailoring */
|
|
NULL, /* ctype */
|
|
NULL, /* to_lower */
|
|
NULL, /* to_upper */
|
|
NULL, /* sort_order */
|
|
NULL, /* uca */
|
|
NULL, /* tab_to_uni */
|
|
NULL, /* tab_from_uni */
|
|
&my_unicase_default, /* caseinfo */
|
|
NULL, /* state_map */
|
|
NULL, /* ident_map */
|
|
1, /* strxfrm_multiply */
|
|
1, /* caseup_multiply */
|
|
1, /* casedn_multiply */
|
|
2, /* mbminlen */
|
|
4, /* mbmaxlen */
|
|
0, /* min_sort_char */
|
|
0xFFFF, /* max_sort_char */
|
|
' ', /* pad char */
|
|
0, /* escape_with_backslash_is_dangerous */
|
|
1, /* levels_for_order */
|
|
&my_charset_utf16le_handler,
|
|
&my_collation_utf16le_general_ci_handler
|
|
};
|
|
|
|
|
|
struct charset_info_st my_charset_utf16le_bin=
|
|
{
|
|
62,0,0, /* number */
|
|
MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
|
|
{ charset_name_utf16le, charset_name_utf16le_length },
|
|
{ STRING_WITH_LEN("utf16le_bin") }, /* name */
|
|
"UTF-16LE Unicode", /* comment */
|
|
NULL, /* tailoring */
|
|
NULL, /* ctype */
|
|
NULL, /* to_lower */
|
|
NULL, /* to_upper */
|
|
NULL, /* sort_order */
|
|
NULL, /* uca */
|
|
NULL, /* tab_to_uni */
|
|
NULL, /* tab_from_uni */
|
|
&my_unicase_default, /* caseinfo */
|
|
NULL, /* state_map */
|
|
NULL, /* ident_map */
|
|
1, /* strxfrm_multiply */
|
|
1, /* caseup_multiply */
|
|
1, /* casedn_multiply */
|
|
2, /* mbminlen */
|
|
4, /* mbmaxlen */
|
|
0, /* min_sort_char */
|
|
0xFFFF, /* max_sort_char */
|
|
' ', /* pad char */
|
|
0, /* escape_with_backslash_is_dangerous */
|
|
1, /* levels_for_order */
|
|
&my_charset_utf16le_handler,
|
|
&my_collation_utf16le_bin_handler
|
|
};
|
|
|
|
|
|
struct charset_info_st my_charset_utf16le_general_nopad_ci=
|
|
{
|
|
MY_NOPAD_ID(56),0,0, /* number */
|
|
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII|MY_CS_NOPAD,
|
|
{ charset_name_utf16le, charset_name_utf16le_length },
|
|
{ STRING_WITH_LEN("utf16le_general_nopad_ci") }, /* name */
|
|
"UTF-16LE Unicode", /* comment */
|
|
NULL, /* tailoring */
|
|
NULL, /* ctype */
|
|
NULL, /* to_lower */
|
|
NULL, /* to_upper */
|
|
NULL, /* sort_order */
|
|
NULL, /* uca */
|
|
NULL, /* tab_to_uni */
|
|
NULL, /* tab_from_uni */
|
|
&my_unicase_default, /* caseinfo */
|
|
NULL, /* state_map */
|
|
NULL, /* ident_map */
|
|
1, /* strxfrm_multiply */
|
|
1, /* caseup_multiply */
|
|
1, /* casedn_multiply */
|
|
2, /* mbminlen */
|
|
4, /* mbmaxlen */
|
|
0, /* min_sort_char */
|
|
0xFFFF, /* max_sort_char */
|
|
' ', /* pad char */
|
|
0, /* escape_with_backslash_is_dangerous */
|
|
1, /* levels_for_order */
|
|
&my_charset_utf16le_handler,
|
|
&my_collation_utf16le_general_nopad_ci_handler
|
|
};
|
|
|
|
|
|
struct charset_info_st my_charset_utf16le_nopad_bin=
|
|
{
|
|
MY_NOPAD_ID(62),0,0, /* number */
|
|
MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII|
|
|
MY_CS_NOPAD,
|
|
{ charset_name_utf16le, charset_name_utf16le_length },
|
|
{ STRING_WITH_LEN("utf16le_nopad_bin") }, /* name */
|
|
"UTF-16LE Unicode", /* comment */
|
|
NULL, /* tailoring */
|
|
NULL, /* ctype */
|
|
NULL, /* to_lower */
|
|
NULL, /* to_upper */
|
|
NULL, /* sort_order */
|
|
NULL, /* uca */
|
|
NULL, /* tab_to_uni */
|
|
NULL, /* tab_from_uni */
|
|
&my_unicase_default, /* caseinfo */
|
|
NULL, /* state_map */
|
|
NULL, /* ident_map */
|
|
1, /* strxfrm_multiply */
|
|
1, /* caseup_multiply */
|
|
1, /* casedn_multiply */
|
|
2, /* mbminlen */
|
|
4, /* mbmaxlen */
|
|
0, /* min_sort_char */
|
|
0xFFFF, /* max_sort_char */
|
|
' ', /* pad char */
|
|
0, /* escape_with_backslash_is_dangerous */
|
|
1, /* levels_for_order */
|
|
&my_charset_utf16le_handler,
|
|
&my_collation_utf16le_nopad_bin_handler
|
|
};
|
|
|
|
|
|
#endif /* HAVE_CHARSET_utf16 */
|
|
|
|
|
|
#ifdef HAVE_CHARSET_utf32
|
|
|
|
#include "ctype-utf32.h"
|
|
|
|
/*
|
|
Check is b0 and b1 start a valid UTF32 four-byte sequence.
|
|
Don't accept characters greater than U+10FFFF.
|
|
*/
|
|
#define IS_UTF32_MBHEAD4(b0,b1) (!(b0) && ((uchar) (b1) <= 0x10))
|
|
|
|
#define IS_MB4_CHAR(b0,b1,b2,b3) (IS_UTF32_MBHEAD4(b0,b1))
|
|
|
|
|
|
static inline int my_weight_utf32_general_ci(uchar b0, uchar b1,
|
|
uchar b2, uchar b3)
|
|
{
|
|
my_wc_t wc= MY_UTF32_WC4(b0, b1, b2, b3);
|
|
if (wc <= 0xFFFF)
|
|
{
|
|
MY_UNICASE_CHARACTER *page= my_unicase_default_pages[wc >> 8];
|
|
return (int) (page ? page[wc & 0xFF].sort : wc);
|
|
}
|
|
return MY_CS_REPLACEMENT_CHARACTER;
|
|
}
|
|
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf32_general_ci
|
|
#define DEFINE_STRNXFRM_UNICODE
|
|
#define DEFINE_STRNXFRM_UNICODE_NOPAD
|
|
#define MY_MB_WC(cs, pwc, s, e) my_mb_wc_utf32_quick(pwc, s, e)
|
|
#define OPTIMIZE_ASCII 0
|
|
#define UNICASE_MAXCHAR MY_UNICASE_INFO_DEFAULT_MAXCHAR
|
|
#define UNICASE_PAGE0 my_unicase_default_page00
|
|
#define UNICASE_PAGES my_unicase_default_pages
|
|
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
|
|
#define WEIGHT_MB4(b0,b1,b2,b3) my_weight_utf32_general_ci(b0, b1, b2, b3)
|
|
#include "strcoll.ic"
|
|
|
|
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf32_bin
|
|
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
|
|
#define WEIGHT_MB4(b0,b1,b2,b3) ((int) MY_UTF32_WC4(b0, b1, b2, b3))
|
|
#include "strcoll.ic"
|
|
|
|
#define DEFINE_STRNNCOLLSP_NOPAD
|
|
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf32_general_nopad_ci
|
|
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
|
|
#define WEIGHT_MB4(b0,b1,b2,b3) my_weight_utf32_general_ci(b0, b1, b2, b3)
|
|
#include "strcoll.ic"
|
|
|
|
#define DEFINE_STRNNCOLLSP_NOPAD
|
|
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf32_nopad_bin
|
|
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
|
|
#define WEIGHT_MB4(b0,b1,b2,b3) ((int) MY_UTF32_WC4(b0, b1, b2, b3))
|
|
#include "strcoll.ic"
|
|
|
|
#undef IS_MB2_CHAR
|
|
#undef IS_MB4_CHAR
|
|
|
|
|
|
static int
|
|
my_utf32_uni(CHARSET_INFO *cs __attribute__((unused)),
|
|
my_wc_t *pwc, const uchar *s, const uchar *e)
|
|
{
|
|
return my_mb_wc_utf32_quick(pwc, s, e);
|
|
}
|
|
|
|
|
|
static int
|
|
my_uni_utf32(CHARSET_INFO *cs __attribute__((unused)),
|
|
my_wc_t wc, uchar *s, uchar *e)
|
|
{
|
|
if (s + 4 > e)
|
|
return MY_CS_TOOSMALL4;
|
|
|
|
if (wc > 0x10FFFF)
|
|
return MY_CS_ILUNI;
|
|
|
|
s[0]= (uchar) (wc >> 24);
|
|
s[1]= (uchar) (wc >> 16) & 0xFF;
|
|
s[2]= (uchar) (wc >> 8) & 0xFF;
|
|
s[3]= (uchar) wc & 0xFF;
|
|
return 4;
|
|
}
|
|
|
|
|
|
static inline void
|
|
my_tolower_utf32(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
|
|
{
|
|
MY_UNICASE_CHARACTER *page;
|
|
if ((*wc <= uni_plane->maxchar) && (page= uni_plane->page[*wc >> 8]))
|
|
*wc= page[*wc & 0xFF].tolower;
|
|
}
|
|
|
|
|
|
static inline void
|
|
my_toupper_utf32(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
|
|
{
|
|
MY_UNICASE_CHARACTER *page;
|
|
if ((*wc <= uni_plane->maxchar) && (page= uni_plane->page[*wc >> 8]))
|
|
*wc= page[*wc & 0xFF].toupper;
|
|
}
|
|
|
|
|
|
static inline void
|
|
my_tosort_utf32(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
|
|
{
|
|
if (*wc <= uni_plane->maxchar)
|
|
{
|
|
MY_UNICASE_CHARACTER *page;
|
|
if ((page= uni_plane->page[*wc >> 8]))
|
|
*wc= page[*wc & 0xFF].sort;
|
|
}
|
|
else
|
|
{
|
|
*wc= MY_CS_REPLACEMENT_CHARACTER;
|
|
}
|
|
}
|
|
|
|
|
|
static size_t
|
|
my_lengthsp_utf32(CHARSET_INFO *cs __attribute__((unused)),
|
|
const char *ptr, size_t length)
|
|
{
|
|
const char *end= ptr + length;
|
|
DBUG_ASSERT((length % 4) == 0);
|
|
while (end > ptr + 3 && end[-1] == ' ' && !end[-2] && !end[-3] && !end[-4])
|
|
end-= 4;
|
|
return (size_t) (end - ptr);
|
|
}
|
|
|
|
|
|
static size_t
|
|
my_caseup_utf32(CHARSET_INFO *cs, const char *src, size_t srclen,
|
|
char *dst, size_t dstlen)
|
|
{
|
|
my_wc_t wc;
|
|
int res;
|
|
const char *srcend= src + srclen;
|
|
char *dstend= dst + dstlen;
|
|
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
|
|
DBUG_ASSERT(srclen <= dstlen);
|
|
|
|
while ((src < srcend) &&
|
|
(res= my_utf32_uni(cs, &wc, (uchar *)src, (uchar*) srcend)) > 0)
|
|
{
|
|
my_toupper_utf32(uni_plane, &wc);
|
|
if (res != my_uni_utf32(cs, wc, (uchar*) dst, (uchar*) dstend))
|
|
break;
|
|
src+= res;
|
|
dst+= res;
|
|
}
|
|
return srclen;
|
|
}
|
|
|
|
|
|
static void
|
|
my_hash_sort_utf32_nopad(CHARSET_INFO *cs, const uchar *s, size_t slen,
|
|
ulong *nr1, ulong *nr2)
|
|
{
|
|
my_wc_t wc;
|
|
int res;
|
|
const uchar *e= s + slen;
|
|
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
|
|
register ulong m1= *nr1, m2= *nr2;
|
|
|
|
while ((res= my_utf32_uni(cs, &wc, (uchar*) s, (uchar*) e)) > 0)
|
|
{
|
|
my_tosort_utf32(uni_plane, &wc);
|
|
MY_HASH_ADD(m1, m2, (uint) (wc >> 24));
|
|
MY_HASH_ADD(m1, m2, (uint) (wc >> 16) & 0xFF);
|
|
MY_HASH_ADD(m1, m2, (uint) (wc >> 8) & 0xFF);
|
|
MY_HASH_ADD(m1, m2, (uint) (wc & 0xFF));
|
|
s+= res;
|
|
}
|
|
*nr1= m1;
|
|
*nr2= m2;
|
|
}
|
|
|
|
|
|
static void
|
|
my_hash_sort_utf32(CHARSET_INFO *cs, const uchar *s, size_t slen,
|
|
ulong *nr1, ulong *nr2)
|
|
{
|
|
size_t lengthsp= my_lengthsp_utf32(cs, (const char *) s, slen);
|
|
my_hash_sort_utf32_nopad(cs, s, lengthsp, nr1, nr2);
|
|
}
|
|
|
|
|
|
static size_t
|
|
my_casedn_utf32(CHARSET_INFO *cs, const char *src, size_t srclen,
|
|
char *dst, size_t dstlen)
|
|
{
|
|
my_wc_t wc;
|
|
int res;
|
|
const char *srcend= src + srclen;
|
|
char *dstend= dst + dstlen;
|
|
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
|
|
DBUG_ASSERT(srclen <= dstlen);
|
|
|
|
while ((res= my_utf32_uni(cs, &wc, (uchar*) src, (uchar*) srcend)) > 0)
|
|
{
|
|
my_tolower_utf32(uni_plane,&wc);
|
|
if (res != my_uni_utf32(cs, wc, (uchar*) dst, (uchar*) dstend))
|
|
break;
|
|
src+= res;
|
|
dst+= res;
|
|
}
|
|
return srclen;
|
|
}
|
|
|
|
|
|
static int
|
|
my_charlen_utf32(CHARSET_INFO *cs __attribute__((unused)),
|
|
const uchar *b, const uchar *e)
|
|
{
|
|
return b + 4 > e ? MY_CS_TOOSMALL4 :
|
|
IS_UTF32_MBHEAD4(b[0], b[1]) ? 4 : MY_CS_ILSEQ;
|
|
}
|
|
|
|
|
|
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf32
|
|
#define CHARLEN(cs,str,end) my_charlen_utf32(cs,str,end)
|
|
#define DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
|
|
#include "ctype-mb.ic"
|
|
#undef MY_FUNCTION_NAME
|
|
#undef CHARLEN
|
|
#undef DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
|
|
/* Defines my_well_formed_char_length_utf32 */
|
|
|
|
|
|
static size_t
|
|
my_vsnprintf_utf32(char *dst, size_t n, const char* fmt, va_list ap)
|
|
{
|
|
char *start= dst, *end= dst + n;
|
|
DBUG_ASSERT((n % 4) == 0);
|
|
for (; *fmt ; fmt++)
|
|
{
|
|
if (fmt[0] != '%')
|
|
{
|
|
if (dst >= end) /* End of buffer */
|
|
break;
|
|
|
|
*dst++= '\0';
|
|
*dst++= '\0';
|
|
*dst++= '\0';
|
|
*dst++= *fmt; /* Copy ordinary char */
|
|
continue;
|
|
}
|
|
|
|
fmt++;
|
|
|
|
/* Skip if max size is used (to be compatible with printf) */
|
|
while ( (*fmt>='0' && *fmt<='9') || *fmt == '.' || *fmt == '-')
|
|
fmt++;
|
|
|
|
if (*fmt == 'l')
|
|
fmt++;
|
|
|
|
if (*fmt == 's') /* String parameter */
|
|
{
|
|
reg2 char *par= va_arg(ap, char *);
|
|
size_t plen;
|
|
size_t left_len= (size_t)(end - dst);
|
|
if (!par) par= (char*)"(null)";
|
|
plen= strlen(par);
|
|
if (left_len <= plen*4)
|
|
plen= left_len / 4 - 1;
|
|
|
|
for ( ; plen ; plen--, dst+= 4, par++)
|
|
{
|
|
dst[0]= '\0';
|
|
dst[1]= '\0';
|
|
dst[2]= '\0';
|
|
dst[3]= par[0];
|
|
}
|
|
continue;
|
|
}
|
|
else if (*fmt == 'd' || *fmt == 'u') /* Integer parameter */
|
|
{
|
|
register int iarg;
|
|
char nbuf[16];
|
|
char *pbuf= nbuf;
|
|
|
|
if ((size_t) (end - dst) < 64)
|
|
break;
|
|
iarg= va_arg(ap, int);
|
|
if (*fmt == 'd')
|
|
int10_to_str((long) iarg, nbuf, -10);
|
|
else
|
|
int10_to_str((long) (uint) iarg,nbuf,10);
|
|
|
|
for (; pbuf[0]; pbuf++)
|
|
{
|
|
*dst++= '\0';
|
|
*dst++= '\0';
|
|
*dst++= '\0';
|
|
*dst++= *pbuf;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
/* We come here on '%%', unknown code or too long parameter */
|
|
if (dst == end)
|
|
break;
|
|
*dst++= '\0';
|
|
*dst++= '\0';
|
|
*dst++= '\0';
|
|
*dst++= '%'; /* % used as % or unknown code */
|
|
}
|
|
|
|
DBUG_ASSERT(dst < end);
|
|
*dst++= '\0';
|
|
*dst++= '\0';
|
|
*dst++= '\0';
|
|
*dst++= '\0'; /* End of errmessage */
|
|
return (size_t) (dst - start - 4);
|
|
}
|
|
|
|
|
|
static size_t
|
|
my_snprintf_utf32(CHARSET_INFO *cs __attribute__((unused)),
|
|
char* to, size_t n, const char* fmt, ...)
|
|
{
|
|
size_t ret;
|
|
va_list args;
|
|
va_start(args,fmt);
|
|
ret= my_vsnprintf_utf32(to, n, fmt, args);
|
|
va_end(args);
|
|
return ret;
|
|
}
|
|
|
|
|
|
static longlong
|
|
my_strtoll10_utf32(CHARSET_INFO *cs __attribute__((unused)),
|
|
const char *nptr, char **endptr, int *error)
|
|
{
|
|
const char *s, *end, *start, *n_end, *true_end;
|
|
uchar c;
|
|
unsigned long i, j, k;
|
|
ulonglong li;
|
|
int negative;
|
|
ulong cutoff, cutoff2, cutoff3;
|
|
|
|
s= nptr;
|
|
/* If fixed length string */
|
|
if (endptr)
|
|
{
|
|
/* Make sure string length is even */
|
|
end= s + ((*endptr - s) / 4) * 4;
|
|
while (s < end && !s[0] && !s[1] && !s[2] &&
|
|
(s[3] == ' ' || s[3] == '\t'))
|
|
s+= 4;
|
|
if (s == end)
|
|
goto no_conv;
|
|
}
|
|
else
|
|
{
|
|
/* We don't support null terminated strings in UCS2 */
|
|
goto no_conv;
|
|
}
|
|
|
|
/* Check for a sign. */
|
|
negative= 0;
|
|
if (!s[0] && !s[1] && !s[2] && s[3] == '-')
|
|
{
|
|
*error= -1; /* Mark as negative number */
|
|
negative= 1;
|
|
s+= 4;
|
|
if (s == end)
|
|
goto no_conv;
|
|
cutoff= MAX_NEGATIVE_NUMBER / LFACTOR2;
|
|
cutoff2= (MAX_NEGATIVE_NUMBER % LFACTOR2) / 100;
|
|
cutoff3= MAX_NEGATIVE_NUMBER % 100;
|
|
}
|
|
else
|
|
{
|
|
*error= 0;
|
|
if (!s[0] && !s[1] && !s[2] && s[3] == '+')
|
|
{
|
|
s+= 4;
|
|
if (s == end)
|
|
goto no_conv;
|
|
}
|
|
cutoff= ULONGLONG_MAX / LFACTOR2;
|
|
cutoff2= ULONGLONG_MAX % LFACTOR2 / 100;
|
|
cutoff3= ULONGLONG_MAX % 100;
|
|
}
|
|
|
|
/* Handle case where we have a lot of pre-zero */
|
|
if (!s[0] && !s[1] && !s[2] && s[3] == '0')
|
|
{
|
|
i= 0;
|
|
do
|
|
{
|
|
s+= 4;
|
|
if (s == end)
|
|
goto end_i; /* Return 0 */
|
|
}
|
|
while (!s[0] && !s[1] && !s[2] && s[3] == '0');
|
|
n_end= s + 4 * INIT_CNT;
|
|
}
|
|
else
|
|
{
|
|
/* Read first digit to check that it's a valid number */
|
|
if (s[0] || s[1] || s[2] || (c= (s[3]-'0')) > 9)
|
|
goto no_conv;
|
|
i= c;
|
|
s+= 4;
|
|
n_end= s + 4 * (INIT_CNT-1);
|
|
}
|
|
|
|
/* Handle first 9 digits and store them in i */
|
|
if (n_end > end)
|
|
n_end= end;
|
|
for (; s != n_end ; s+= 4)
|
|
{
|
|
if (s[0] || s[1] || s[2] || (c= (s[3] - '0')) > 9)
|
|
goto end_i;
|
|
i= i * 10 + c;
|
|
}
|
|
if (s == end)
|
|
goto end_i;
|
|
|
|
/* Handle next 9 digits and store them in j */
|
|
j= 0;
|
|
start= s; /* Used to know how much to shift i */
|
|
n_end= true_end= s + 4 * INIT_CNT;
|
|
if (n_end > end)
|
|
n_end= end;
|
|
do
|
|
{
|
|
if (s[0] || s[1] || s[2] || (c= (s[3] - '0')) > 9)
|
|
goto end_i_and_j;
|
|
j= j * 10 + c;
|
|
s+= 4;
|
|
} while (s != n_end);
|
|
if (s == end)
|
|
{
|
|
if (s != true_end)
|
|
goto end_i_and_j;
|
|
goto end3;
|
|
}
|
|
if (s[0] || s[1] || s[2] || (c= (s[3] - '0')) > 9)
|
|
goto end3;
|
|
|
|
/* Handle the next 1 or 2 digits and store them in k */
|
|
k=c;
|
|
s+= 4;
|
|
if (s == end || s[0] || s[1] || s[2] || (c= (s[3]-'0')) > 9)
|
|
goto end4;
|
|
k= k * 10 + c;
|
|
s+= 4;
|
|
*endptr= (char*) s;
|
|
|
|
/* number string should have ended here */
|
|
if (s != end && !s[0] && !s[1] && !s[2] && (c= (s[3] - '0')) <= 9)
|
|
goto overflow;
|
|
|
|
/* Check that we didn't get an overflow with the last digit */
|
|
if (i > cutoff || (i == cutoff && ((j > cutoff2 || j == cutoff2) &&
|
|
k > cutoff3)))
|
|
goto overflow;
|
|
li= i * LFACTOR2+ (ulonglong) j * 100 + k;
|
|
return (longlong) li;
|
|
|
|
overflow: /* *endptr is set here */
|
|
*error= MY_ERRNO_ERANGE;
|
|
return negative ? LONGLONG_MIN : (longlong) ULONGLONG_MAX;
|
|
|
|
end_i:
|
|
*endptr= (char*) s;
|
|
return (negative ? ((longlong) -(long) i) : (longlong) i);
|
|
|
|
end_i_and_j:
|
|
li= (ulonglong) i * lfactor[(size_t) (s-start) / 4] + j;
|
|
*endptr= (char*) s;
|
|
return (negative ? -((longlong) li) : (longlong) li);
|
|
|
|
end3:
|
|
li= (ulonglong) i*LFACTOR+ (ulonglong) j;
|
|
*endptr= (char*) s;
|
|
return (negative ? -((longlong) li) : (longlong) li);
|
|
|
|
end4:
|
|
li= (ulonglong) i*LFACTOR1+ (ulonglong) j * 10 + k;
|
|
*endptr= (char*) s;
|
|
if (negative)
|
|
{
|
|
if (li > MAX_NEGATIVE_NUMBER)
|
|
goto overflow;
|
|
return -((longlong) li);
|
|
}
|
|
return (longlong) li;
|
|
|
|
no_conv:
|
|
/* There was no number to convert. */
|
|
*error= MY_ERRNO_EDOM;
|
|
*endptr= (char *) nptr;
|
|
return 0;
|
|
}
|
|
|
|
|
|
static size_t
|
|
my_numchars_utf32(CHARSET_INFO *cs __attribute__((unused)),
|
|
const char *b, const char *e)
|
|
{
|
|
return (size_t) (e - b) / 4;
|
|
}
|
|
|
|
|
|
static size_t
|
|
my_charpos_utf32(CHARSET_INFO *cs __attribute__((unused)),
|
|
const char *b, const char *e, size_t pos)
|
|
{
|
|
size_t string_length= (size_t) (e - b);
|
|
return pos * 4 > string_length ? string_length + 4 : pos * 4;
|
|
}
|
|
|
|
|
|
static
|
|
void my_fill_utf32(CHARSET_INFO *cs,
|
|
char *s, size_t slen, int fill)
|
|
{
|
|
char buf[10];
|
|
#ifdef DBUG_ASSERT_EXISTS
|
|
uint buflen;
|
|
#endif
|
|
char *e= s + slen;
|
|
|
|
DBUG_ASSERT((slen % 4) == 0);
|
|
|
|
#ifdef DBUG_ASSERT_EXISTS
|
|
buflen=
|
|
#endif
|
|
my_ci_wc_mb(cs, (my_wc_t) fill, (uchar*) buf, (uchar*) buf + sizeof(buf));
|
|
DBUG_ASSERT(buflen == 4);
|
|
while (s < e)
|
|
{
|
|
memcpy(s, buf, 4);
|
|
s+= 4;
|
|
}
|
|
}
|
|
|
|
|
|
static int
|
|
my_wildcmp_utf32_ci(CHARSET_INFO *cs,
|
|
const char *str, const char *str_end,
|
|
const char *wildstr, const char *wildend,
|
|
int escape, int w_one, int w_many)
|
|
{
|
|
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
|
|
return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend,
|
|
escape, w_one, w_many, uni_plane);
|
|
}
|
|
|
|
|
|
static int
|
|
my_wildcmp_utf32_bin(CHARSET_INFO *cs,
|
|
const char *str,const char *str_end,
|
|
const char *wildstr,const char *wildend,
|
|
int escape, int w_one, int w_many)
|
|
{
|
|
return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend,
|
|
escape, w_one, w_many, NULL);
|
|
}
|
|
|
|
|
|
static size_t
|
|
my_scan_utf32(CHARSET_INFO *cs,
|
|
const char *str, const char *end, int sequence_type)
|
|
{
|
|
const char *str0= str;
|
|
|
|
switch (sequence_type)
|
|
{
|
|
case MY_SEQ_SPACES:
|
|
for ( ; str < end; )
|
|
{
|
|
my_wc_t wc;
|
|
int res= my_utf32_uni(cs, &wc, (uchar*) str, (uchar*) end);
|
|
if (res < 0 || wc != ' ')
|
|
break;
|
|
str+= res;
|
|
}
|
|
return (size_t) (str - str0);
|
|
case MY_SEQ_NONSPACES:
|
|
DBUG_ASSERT(0); /* Not implemented */
|
|
/* pass through */
|
|
default:
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
|
|
static MY_COLLATION_HANDLER my_collation_utf32_general_ci_handler =
|
|
{
|
|
NULL, /* init */
|
|
my_strnncoll_utf32_general_ci,
|
|
my_strnncollsp_utf32_general_ci,
|
|
my_strnxfrm_utf32_general_ci,
|
|
my_strnxfrmlen_unicode,
|
|
my_like_range_generic,
|
|
my_wildcmp_utf32_ci,
|
|
my_strcasecmp_mb2_or_mb4,
|
|
my_instr_mb,
|
|
my_hash_sort_utf32,
|
|
my_propagate_simple
|
|
};
|
|
|
|
|
|
static MY_COLLATION_HANDLER my_collation_utf32_bin_handler =
|
|
{
|
|
NULL, /* init */
|
|
my_strnncoll_utf32_bin,
|
|
my_strnncollsp_utf32_bin,
|
|
my_strnxfrm_unicode_full_bin,
|
|
my_strnxfrmlen_unicode_full_bin,
|
|
my_like_range_generic,
|
|
my_wildcmp_utf32_bin,
|
|
my_strcasecmp_mb2_or_mb4,
|
|
my_instr_mb,
|
|
my_hash_sort_utf32,
|
|
my_propagate_simple
|
|
};
|
|
|
|
|
|
static MY_COLLATION_HANDLER my_collation_utf32_general_nopad_ci_handler =
|
|
{
|
|
NULL, /* init */
|
|
my_strnncoll_utf32_general_ci,
|
|
my_strnncollsp_utf32_general_nopad_ci,
|
|
my_strnxfrm_nopad_utf32_general_ci,
|
|
my_strnxfrmlen_unicode,
|
|
my_like_range_generic,
|
|
my_wildcmp_utf32_ci,
|
|
my_strcasecmp_mb2_or_mb4,
|
|
my_instr_mb,
|
|
my_hash_sort_utf32_nopad,
|
|
my_propagate_simple
|
|
};
|
|
|
|
|
|
static MY_COLLATION_HANDLER my_collation_utf32_nopad_bin_handler =
|
|
{
|
|
NULL, /* init */
|
|
my_strnncoll_utf32_bin,
|
|
my_strnncollsp_utf32_nopad_bin,
|
|
my_strnxfrm_unicode_full_nopad_bin,
|
|
my_strnxfrmlen_unicode_full_bin,
|
|
my_like_range_generic,
|
|
my_wildcmp_utf32_bin,
|
|
my_strcasecmp_mb2_or_mb4,
|
|
my_instr_mb,
|
|
my_hash_sort_utf32_nopad,
|
|
my_propagate_simple
|
|
};
|
|
|
|
|
|
MY_CHARSET_HANDLER my_charset_utf32_handler=
|
|
{
|
|
NULL, /* init */
|
|
my_numchars_utf32,
|
|
my_charpos_utf32,
|
|
my_lengthsp_utf32,
|
|
my_numcells_mb,
|
|
my_utf32_uni,
|
|
my_uni_utf32,
|
|
my_mb_ctype_mb,
|
|
my_caseup_str_mb2_or_mb4,
|
|
my_casedn_str_mb2_or_mb4,
|
|
my_caseup_utf32,
|
|
my_casedn_utf32,
|
|
my_snprintf_utf32,
|
|
my_l10tostr_mb2_or_mb4,
|
|
my_ll10tostr_mb2_or_mb4,
|
|
my_fill_utf32,
|
|
my_strntol_mb2_or_mb4,
|
|
my_strntoul_mb2_or_mb4,
|
|
my_strntoll_mb2_or_mb4,
|
|
my_strntoull_mb2_or_mb4,
|
|
my_strntod_mb2_or_mb4,
|
|
my_strtoll10_utf32,
|
|
my_strntoull10rnd_mb2_or_mb4,
|
|
my_scan_utf32,
|
|
my_charlen_utf32,
|
|
my_well_formed_char_length_utf32,
|
|
my_copy_fix_mb2_or_mb4,
|
|
my_uni_utf32,
|
|
my_wc_to_printable_generic
|
|
};
|
|
|
|
|
|
struct charset_info_st my_charset_utf32_general_ci=
|
|
{
|
|
60,0,0, /* number */
|
|
MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
|
|
{ charset_name_utf32, charset_name_utf32_length }, /* cs name */
|
|
{ STRING_WITH_LEN("utf32_general_ci") }, /* name */
|
|
"UTF-32 Unicode", /* comment */
|
|
NULL, /* tailoring */
|
|
NULL, /* ctype */
|
|
NULL, /* to_lower */
|
|
NULL, /* to_upper */
|
|
NULL, /* sort_order */
|
|
NULL, /* uca */
|
|
NULL, /* tab_to_uni */
|
|
NULL, /* tab_from_uni */
|
|
&my_unicase_default, /* caseinfo */
|
|
NULL, /* state_map */
|
|
NULL, /* ident_map */
|
|
1, /* strxfrm_multiply */
|
|
1, /* caseup_multiply */
|
|
1, /* casedn_multiply */
|
|
4, /* mbminlen */
|
|
4, /* mbmaxlen */
|
|
0, /* min_sort_char */
|
|
0xFFFF, /* max_sort_char */
|
|
' ', /* pad char */
|
|
0, /* escape_with_backslash_is_dangerous */
|
|
1, /* levels_for_order */
|
|
&my_charset_utf32_handler,
|
|
&my_collation_utf32_general_ci_handler
|
|
};
|
|
|
|
|
|
struct charset_info_st my_charset_utf32_bin=
|
|
{
|
|
61,0,0, /* number */
|
|
MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
|
|
{ charset_name_utf32, charset_name_utf32_length }, /* cs name */
|
|
{ STRING_WITH_LEN("utf32_bin") }, /* name */
|
|
"UTF-32 Unicode", /* comment */
|
|
NULL, /* tailoring */
|
|
NULL, /* ctype */
|
|
NULL, /* to_lower */
|
|
NULL, /* to_upper */
|
|
NULL, /* sort_order */
|
|
NULL, /* uca */
|
|
NULL, /* tab_to_uni */
|
|
NULL, /* tab_from_uni */
|
|
&my_unicase_default, /* caseinfo */
|
|
NULL, /* state_map */
|
|
NULL, /* ident_map */
|
|
1, /* strxfrm_multiply */
|
|
1, /* caseup_multiply */
|
|
1, /* casedn_multiply */
|
|
4, /* mbminlen */
|
|
4, /* mbmaxlen */
|
|
0, /* min_sort_char */
|
|
0xFFFF, /* max_sort_char */
|
|
' ', /* pad char */
|
|
0, /* escape_with_backslash_is_dangerous */
|
|
1, /* levels_for_order */
|
|
&my_charset_utf32_handler,
|
|
&my_collation_utf32_bin_handler
|
|
};
|
|
|
|
|
|
struct charset_info_st my_charset_utf32_general_nopad_ci=
|
|
{
|
|
MY_NOPAD_ID(60),0,0, /* number */
|
|
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII|MY_CS_NOPAD,
|
|
{ charset_name_utf32, charset_name_utf32_length }, /* cs name */
|
|
{ STRING_WITH_LEN("utf32_general_nopad_ci") }, /* name */
|
|
"UTF-32 Unicode", /* comment */
|
|
NULL, /* tailoring */
|
|
NULL, /* ctype */
|
|
NULL, /* to_lower */
|
|
NULL, /* to_upper */
|
|
NULL, /* sort_order */
|
|
NULL, /* uca */
|
|
NULL, /* tab_to_uni */
|
|
NULL, /* tab_from_uni */
|
|
&my_unicase_default, /* caseinfo */
|
|
NULL, /* state_map */
|
|
NULL, /* ident_map */
|
|
1, /* strxfrm_multiply */
|
|
1, /* caseup_multiply */
|
|
1, /* casedn_multiply */
|
|
4, /* mbminlen */
|
|
4, /* mbmaxlen */
|
|
0, /* min_sort_char */
|
|
0xFFFF, /* max_sort_char */
|
|
' ', /* pad char */
|
|
0, /* escape_with_backslash_is_dangerous */
|
|
1, /* levels_for_order */
|
|
&my_charset_utf32_handler,
|
|
&my_collation_utf32_general_nopad_ci_handler
|
|
};
|
|
|
|
|
|
struct charset_info_st my_charset_utf32_nopad_bin=
|
|
{
|
|
MY_NOPAD_ID(61),0,0, /* number */
|
|
MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII|
|
|
MY_CS_NOPAD,
|
|
{ charset_name_utf32, charset_name_utf32_length }, /* cs name */
|
|
{ STRING_WITH_LEN("utf32_nopad_bin") }, /* name */
|
|
"UTF-32 Unicode", /* comment */
|
|
NULL, /* tailoring */
|
|
NULL, /* ctype */
|
|
NULL, /* to_lower */
|
|
NULL, /* to_upper */
|
|
NULL, /* sort_order */
|
|
NULL, /* uca */
|
|
NULL, /* tab_to_uni */
|
|
NULL, /* tab_from_uni */
|
|
&my_unicase_default, /* caseinfo */
|
|
NULL, /* state_map */
|
|
NULL, /* ident_map */
|
|
1, /* strxfrm_multiply */
|
|
1, /* caseup_multiply */
|
|
1, /* casedn_multiply */
|
|
4, /* mbminlen */
|
|
4, /* mbmaxlen */
|
|
0, /* min_sort_char */
|
|
0xFFFF, /* max_sort_char */
|
|
' ', /* pad char */
|
|
0, /* escape_with_backslash_is_dangerous */
|
|
1, /* levels_for_order */
|
|
&my_charset_utf32_handler,
|
|
&my_collation_utf32_nopad_bin_handler
|
|
};
|
|
|
|
|
|
#endif /* HAVE_CHARSET_utf32 */
|
|
|
|
|
|
#ifdef HAVE_CHARSET_ucs2
|
|
|
|
#include "ctype-ucs2.h"
|
|
|
|
static const uchar ctype_ucs2[] = {
|
|
0,
|
|
32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32,
|
|
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
|
|
72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
|
|
132,132,132,132,132,132,132,132,132,132, 16, 16, 16, 16, 16, 16,
|
|
16,129,129,129,129,129,129, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16, 16,
|
|
16,130,130,130,130,130,130, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
|
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, 16, 16, 16, 32,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
|
};
|
|
|
|
static const uchar to_lower_ucs2[] = {
|
|
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
|
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
|
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
|
|
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
|
64, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
|
|
112,113,114,115,116,117,118,119,120,121,122, 91, 92, 93, 94, 95,
|
|
96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
|
|
112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
|
|
128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
|
|
144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
|
|
160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
|
|
176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
|
|
192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
|
|
208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
|
|
224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
|
|
240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
|
|
};
|
|
|
|
static const uchar to_upper_ucs2[] = {
|
|
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
|
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
|
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
|
|
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
|
64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
|
|
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
|
|
96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
|
|
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127,
|
|
128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
|
|
144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
|
|
160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
|
|
176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
|
|
192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
|
|
208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
|
|
224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
|
|
240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
|
|
};
|
|
|
|
|
|
/* Definitions for strcoll.ic */
|
|
#define IS_MB2_CHAR(x,y) (1)
|
|
#define UCS2_CODE(b0,b1) (((uchar) b0) << 8 | ((uchar) b1))
|
|
|
|
|
|
static inline int my_weight_mb2_ucs2_general_ci(uchar b0, uchar b1)
|
|
{
|
|
my_wc_t wc= UCS2_CODE(b0, b1);
|
|
MY_UNICASE_CHARACTER *page= my_unicase_default_pages[wc >> 8];
|
|
return (int) (page ? page[wc & 0xFF].sort : wc);
|
|
}
|
|
|
|
|
|
#define MY_FUNCTION_NAME(x) my_ ## x ## _ucs2_general_ci
|
|
#define DEFINE_STRNXFRM_UNICODE
|
|
#define DEFINE_STRNXFRM_UNICODE_NOPAD
|
|
#define MY_MB_WC(cs, pwc, s, e) my_mb_wc_ucs2_quick(pwc, s, e)
|
|
#define OPTIMIZE_ASCII 0
|
|
#define UNICASE_MAXCHAR MY_UNICASE_INFO_DEFAULT_MAXCHAR
|
|
#define UNICASE_PAGE0 my_unicase_default_page00
|
|
#define UNICASE_PAGES my_unicase_default_pages
|
|
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
|
|
#define WEIGHT_MB2(b0,b1) my_weight_mb2_ucs2_general_ci(b0,b1)
|
|
#include "strcoll.ic"
|
|
|
|
|
|
#define MY_FUNCTION_NAME(x) my_ ## x ## _ucs2_bin
|
|
#define DEFINE_STRNXFRM_UNICODE_BIN2
|
|
#define MY_MB_WC(cs, pwc, s, e) my_mb_wc_ucs2_quick(pwc, s, e)
|
|
#define OPTIMIZE_ASCII 0
|
|
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
|
|
#define WEIGHT_MB2(b0,b1) UCS2_CODE(b0,b1)
|
|
#include "strcoll.ic"
|
|
|
|
|
|
#define DEFINE_STRNNCOLLSP_NOPAD
|
|
#define MY_FUNCTION_NAME(x) my_ ## x ## _ucs2_general_nopad_ci
|
|
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
|
|
#define WEIGHT_MB2(b0,b1) my_weight_mb2_ucs2_general_ci(b0,b1)
|
|
#include "strcoll.ic"
|
|
|
|
|
|
#define DEFINE_STRNNCOLLSP_NOPAD
|
|
#define MY_FUNCTION_NAME(x) my_ ## x ## _ucs2_nopad_bin
|
|
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
|
|
#define WEIGHT_MB2(b0,b1) UCS2_CODE(b0,b1)
|
|
#include "strcoll.ic"
|
|
|
|
|
|
static int
|
|
my_charlen_ucs2(CHARSET_INFO *cs __attribute__((unused)),
|
|
const uchar *s, const uchar *e)
|
|
{
|
|
return s + 2 > e ? MY_CS_TOOSMALLN(2) : 2;
|
|
}
|
|
|
|
|
|
static int my_ucs2_uni(CHARSET_INFO *cs __attribute__((unused)),
|
|
my_wc_t * pwc, const uchar *s, const uchar *e)
|
|
{
|
|
return my_mb_wc_ucs2_quick(pwc, s, e);
|
|
}
|
|
|
|
static int my_uni_ucs2(CHARSET_INFO *cs __attribute__((unused)) ,
|
|
my_wc_t wc, uchar *r, uchar *e)
|
|
{
|
|
if ( r+2 > e )
|
|
return MY_CS_TOOSMALL2;
|
|
|
|
if (wc > 0xFFFF) /* UCS2 does not support characters outside BMP */
|
|
return MY_CS_ILUNI;
|
|
|
|
r[0]= (uchar) (wc >> 8);
|
|
r[1]= (uchar) (wc & 0xFF);
|
|
return 2;
|
|
}
|
|
|
|
|
|
static inline void
|
|
my_tolower_ucs2(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
|
|
{
|
|
MY_UNICASE_CHARACTER *page;
|
|
if ((page= uni_plane->page[(*wc >> 8) & 0xFF]))
|
|
*wc= page[*wc & 0xFF].tolower;
|
|
}
|
|
|
|
|
|
static inline void
|
|
my_toupper_ucs2(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
|
|
{
|
|
MY_UNICASE_CHARACTER *page;
|
|
if ((page= uni_plane->page[(*wc >> 8) & 0xFF]))
|
|
*wc= page[*wc & 0xFF].toupper;
|
|
}
|
|
|
|
|
|
static inline void
|
|
my_tosort_ucs2(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
|
|
{
|
|
MY_UNICASE_CHARACTER *page;
|
|
if ((page= uni_plane->page[(*wc >> 8) & 0xFF]))
|
|
*wc= page[*wc & 0xFF].sort;
|
|
}
|
|
|
|
static size_t my_caseup_ucs2(CHARSET_INFO *cs, const char *src, size_t srclen,
|
|
char *dst, size_t dstlen)
|
|
{
|
|
my_wc_t wc;
|
|
int res;
|
|
const char *srcend= src + srclen;
|
|
char *dstend= dst + dstlen;
|
|
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
|
|
DBUG_ASSERT(srclen <= dstlen);
|
|
|
|
while ((src < srcend) &&
|
|
(res= my_ucs2_uni(cs, &wc, (uchar *)src, (uchar*) srcend)) > 0)
|
|
{
|
|
my_toupper_ucs2(uni_plane, &wc);
|
|
if (res != my_uni_ucs2(cs, wc, (uchar*) dst, (uchar*) dstend))
|
|
break;
|
|
src+= res;
|
|
dst+= res;
|
|
}
|
|
return srclen;
|
|
}
|
|
|
|
|
|
static void
|
|
my_hash_sort_ucs2_nopad(CHARSET_INFO *cs, const uchar *s, size_t slen,
|
|
ulong *nr1, ulong *nr2)
|
|
{
|
|
my_wc_t wc;
|
|
int res;
|
|
const uchar *e=s+slen;
|
|
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
|
|
register ulong m1= *nr1, m2= *nr2;
|
|
|
|
while ((s < e) && (res=my_ucs2_uni(cs,&wc, (uchar *)s, (uchar*)e)) >0)
|
|
{
|
|
my_tosort_ucs2(uni_plane, &wc);
|
|
MY_HASH_ADD_16(m1, m2, wc);
|
|
s+=res;
|
|
}
|
|
*nr1= m1;
|
|
*nr2= m2;
|
|
}
|
|
|
|
|
|
static void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, size_t slen,
|
|
ulong *nr1, ulong *nr2)
|
|
{
|
|
size_t lengthsp= my_lengthsp_mb2(cs, (const char *) s, slen);
|
|
my_hash_sort_ucs2_nopad(cs, s, lengthsp, nr1, nr2);
|
|
}
|
|
|
|
static size_t my_casedn_ucs2(CHARSET_INFO *cs, const char *src, size_t srclen,
|
|
char *dst, size_t dstlen)
|
|
{
|
|
my_wc_t wc;
|
|
int res;
|
|
const char *srcend= src + srclen;
|
|
char *dstend= dst + dstlen;
|
|
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
|
|
DBUG_ASSERT(srclen <= dstlen);
|
|
|
|
while ((src < srcend) &&
|
|
(res= my_ucs2_uni(cs, &wc, (uchar*) src, (uchar*) srcend)) > 0)
|
|
{
|
|
my_tolower_ucs2(uni_plane, &wc);
|
|
if (res != my_uni_ucs2(cs, wc, (uchar*) dst, (uchar*) dstend))
|
|
break;
|
|
src+= res;
|
|
dst+= res;
|
|
}
|
|
return srclen;
|
|
}
|
|
|
|
|
|
static void
|
|
my_fill_ucs2(CHARSET_INFO *cs __attribute__((unused)),
|
|
char *s, size_t l, int fill)
|
|
{
|
|
DBUG_ASSERT(fill <= 0xFFFF);
|
|
#ifdef WAITING_FOR_GCC_VECTORIZATION_BUG_TO_BE_FIXED
|
|
/*
|
|
This code with int2store() is known to be faster on some processors,
|
|
but crashes on other processors due to a possible bug in GCC's
|
|
-ftree-vectorization (which is enabled in -O3) in case of
|
|
a non-aligned memory. See here for details:
|
|
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58039
|
|
*/
|
|
char *last= s + l - 2;
|
|
uint16 tmp= (fill >> 8) + ((fill & 0xFF) << 8); /* swap bytes */
|
|
DBUG_ASSERT(fill <= 0xFFFF);
|
|
for ( ; s <= last; s+= 2)
|
|
int2store(s, tmp); /* store little-endian */
|
|
#else
|
|
for ( ; l >= 2; s[0]= (fill >> 8), s[1]= (fill & 0xFF), s+= 2, l-= 2);
|
|
#endif
|
|
}
|
|
|
|
|
|
static
|
|
size_t my_numchars_ucs2(CHARSET_INFO *cs __attribute__((unused)),
|
|
const char *b, const char *e)
|
|
{
|
|
return (size_t) (e-b)/2;
|
|
}
|
|
|
|
|
|
static
|
|
size_t my_charpos_ucs2(CHARSET_INFO *cs __attribute__((unused)),
|
|
const char *b __attribute__((unused)),
|
|
const char *e __attribute__((unused)),
|
|
size_t pos)
|
|
{
|
|
size_t string_length= (size_t) (e - b);
|
|
return pos > string_length ? string_length + 2 : pos * 2;
|
|
}
|
|
|
|
|
|
static size_t
|
|
my_well_formed_char_length_ucs2(CHARSET_INFO *cs __attribute__((unused)),
|
|
const char *b, const char *e,
|
|
size_t nchars, MY_STRCOPY_STATUS *status)
|
|
{
|
|
size_t length= e - b;
|
|
if (nchars * 2 <= length)
|
|
{
|
|
status->m_well_formed_error_pos= NULL;
|
|
status->m_source_end_pos= b + (nchars * 2);
|
|
return nchars;
|
|
}
|
|
if (length % 2)
|
|
{
|
|
status->m_well_formed_error_pos= status->m_source_end_pos= e - 1;
|
|
}
|
|
else
|
|
{
|
|
status->m_well_formed_error_pos= NULL;
|
|
status->m_source_end_pos= e;
|
|
}
|
|
return length / 2;
|
|
}
|
|
|
|
|
|
static
|
|
int my_wildcmp_ucs2_ci(CHARSET_INFO *cs,
|
|
const char *str,const char *str_end,
|
|
const char *wildstr,const char *wildend,
|
|
int escape, int w_one, int w_many)
|
|
{
|
|
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
|
|
return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend,
|
|
escape,w_one,w_many,uni_plane);
|
|
}
|
|
|
|
|
|
static
|
|
int my_wildcmp_ucs2_bin(CHARSET_INFO *cs,
|
|
const char *str,const char *str_end,
|
|
const char *wildstr,const char *wildend,
|
|
int escape, int w_one, int w_many)
|
|
{
|
|
return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend,
|
|
escape,w_one,w_many,NULL);
|
|
}
|
|
|
|
|
|
static void
|
|
my_hash_sort_ucs2_nopad_bin(CHARSET_INFO *cs __attribute__((unused)),
|
|
const uchar *key, size_t len,
|
|
ulong *nr1, ulong *nr2)
|
|
{
|
|
const uchar *end= key + len;
|
|
register ulong m1= *nr1, m2= *nr2;
|
|
for ( ; key < end ; key++)
|
|
{
|
|
MY_HASH_ADD(m1, m2, (uint)*key);
|
|
}
|
|
*nr1= m1;
|
|
*nr2= m2;
|
|
}
|
|
|
|
|
|
static void
|
|
my_hash_sort_ucs2_bin(CHARSET_INFO *cs,
|
|
const uchar *key, size_t len, ulong *nr1, ulong *nr2)
|
|
{
|
|
size_t lengthsp= my_lengthsp_mb2(cs, (const char *) key, len);
|
|
my_hash_sort_ucs2_nopad_bin(cs, key, lengthsp, nr1, nr2);
|
|
}
|
|
|
|
|
|
static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler =
|
|
{
|
|
NULL, /* init */
|
|
my_strnncoll_ucs2_general_ci,
|
|
my_strnncollsp_ucs2_general_ci,
|
|
my_strnxfrm_ucs2_general_ci,
|
|
my_strnxfrmlen_unicode,
|
|
my_like_range_generic,
|
|
my_wildcmp_ucs2_ci,
|
|
my_strcasecmp_mb2_or_mb4,
|
|
my_instr_mb,
|
|
my_hash_sort_ucs2,
|
|
my_propagate_simple
|
|
};
|
|
|
|
|
|
static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler =
|
|
{
|
|
NULL, /* init */
|
|
my_strnncoll_ucs2_bin,
|
|
my_strnncollsp_ucs2_bin,
|
|
my_strnxfrm_ucs2_bin,
|
|
my_strnxfrmlen_unicode,
|
|
my_like_range_generic,
|
|
my_wildcmp_ucs2_bin,
|
|
my_strcasecmp_mb2_or_mb4,
|
|
my_instr_mb,
|
|
my_hash_sort_ucs2_bin,
|
|
my_propagate_simple
|
|
};
|
|
|
|
|
|
static MY_COLLATION_HANDLER my_collation_ucs2_general_nopad_ci_handler =
|
|
{
|
|
NULL, /* init */
|
|
my_strnncoll_ucs2_general_ci,
|
|
my_strnncollsp_ucs2_general_nopad_ci,
|
|
my_strnxfrm_nopad_ucs2_general_ci,
|
|
my_strnxfrmlen_unicode,
|
|
my_like_range_generic,
|
|
my_wildcmp_ucs2_ci,
|
|
my_strcasecmp_mb2_or_mb4,
|
|
my_instr_mb,
|
|
my_hash_sort_ucs2_nopad,
|
|
my_propagate_simple
|
|
};
|
|
|
|
|
|
static MY_COLLATION_HANDLER my_collation_ucs2_nopad_bin_handler =
|
|
{
|
|
NULL, /* init */
|
|
my_strnncoll_ucs2_bin,
|
|
my_strnncollsp_ucs2_nopad_bin,
|
|
my_strnxfrm_nopad_ucs2_bin,
|
|
my_strnxfrmlen_unicode,
|
|
my_like_range_generic,
|
|
my_wildcmp_ucs2_bin,
|
|
my_strcasecmp_mb2_or_mb4,
|
|
my_instr_mb,
|
|
my_hash_sort_ucs2_nopad_bin,
|
|
my_propagate_simple
|
|
};
|
|
|
|
|
|
MY_CHARSET_HANDLER my_charset_ucs2_handler=
|
|
{
|
|
NULL, /* init */
|
|
my_numchars_ucs2,
|
|
my_charpos_ucs2,
|
|
my_lengthsp_mb2,
|
|
my_numcells_mb,
|
|
my_ucs2_uni, /* mb_wc */
|
|
my_uni_ucs2, /* wc_mb */
|
|
my_mb_ctype_mb,
|
|
my_caseup_str_mb2_or_mb4,
|
|
my_casedn_str_mb2_or_mb4,
|
|
my_caseup_ucs2,
|
|
my_casedn_ucs2,
|
|
my_snprintf_mb2,
|
|
my_l10tostr_mb2_or_mb4,
|
|
my_ll10tostr_mb2_or_mb4,
|
|
my_fill_ucs2,
|
|
my_strntol_mb2_or_mb4,
|
|
my_strntoul_mb2_or_mb4,
|
|
my_strntoll_mb2_or_mb4,
|
|
my_strntoull_mb2_or_mb4,
|
|
my_strntod_mb2_or_mb4,
|
|
my_strtoll10_mb2,
|
|
my_strntoull10rnd_mb2_or_mb4,
|
|
my_scan_mb2,
|
|
my_charlen_ucs2,
|
|
my_well_formed_char_length_ucs2,
|
|
my_copy_fix_mb2_or_mb4,
|
|
my_uni_ucs2,
|
|
my_wc_to_printable_generic
|
|
};
|
|
|
|
|
|
struct charset_info_st my_charset_ucs2_general_ci=
|
|
{
|
|
35,0,0, /* number */
|
|
MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
|
|
{ charset_name_ucs2, charset_name_ucs2_length}, /* cs name */
|
|
{ STRING_WITH_LEN("ucs2_general_ci") }, /* name */
|
|
"", /* comment */
|
|
NULL, /* tailoring */
|
|
ctype_ucs2, /* ctype */
|
|
to_lower_ucs2, /* to_lower */
|
|
to_upper_ucs2, /* to_upper */
|
|
to_upper_ucs2, /* sort_order */
|
|
NULL, /* uca */
|
|
NULL, /* tab_to_uni */
|
|
NULL, /* tab_from_uni */
|
|
&my_unicase_default,/* caseinfo */
|
|
NULL, /* state_map */
|
|
NULL, /* ident_map */
|
|
1, /* strxfrm_multiply */
|
|
1, /* caseup_multiply */
|
|
1, /* casedn_multiply */
|
|
2, /* mbminlen */
|
|
2, /* mbmaxlen */
|
|
0, /* min_sort_char */
|
|
0xFFFF, /* max_sort_char */
|
|
' ', /* pad char */
|
|
0, /* escape_with_backslash_is_dangerous */
|
|
1, /* levels_for_order */
|
|
&my_charset_ucs2_handler,
|
|
&my_collation_ucs2_general_ci_handler
|
|
};
|
|
|
|
|
|
struct charset_info_st my_charset_ucs2_general_mysql500_ci=
|
|
{
|
|
159, 0, 0, /* number */
|
|
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, /* state */
|
|
{ charset_name_ucs2, charset_name_ucs2_length }, /* cs name */
|
|
{ STRING_WITH_LEN("ucs2_general_mysql500_ci") }, /* name */
|
|
"", /* comment */
|
|
NULL, /* tailoring */
|
|
ctype_ucs2, /* ctype */
|
|
to_lower_ucs2, /* to_lower */
|
|
to_upper_ucs2, /* to_upper */
|
|
to_upper_ucs2, /* sort_order */
|
|
NULL, /* uca */
|
|
NULL, /* tab_to_uni */
|
|
NULL, /* tab_from_uni */
|
|
&my_unicase_mysql500, /* caseinfo */
|
|
NULL, /* state_map */
|
|
NULL, /* ident_map */
|
|
1, /* strxfrm_multiply */
|
|
1, /* caseup_multiply */
|
|
1, /* casedn_multiply */
|
|
2, /* mbminlen */
|
|
2, /* mbmaxlen */
|
|
0, /* min_sort_char */
|
|
0xFFFF, /* max_sort_char */
|
|
' ', /* pad char */
|
|
0, /* escape_with_backslash_is_dangerous */
|
|
1, /* levels_for_order */
|
|
&my_charset_ucs2_handler,
|
|
&my_collation_ucs2_general_ci_handler
|
|
};
|
|
|
|
|
|
struct charset_info_st my_charset_ucs2_bin=
|
|
{
|
|
90,0,0, /* number */
|
|
MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE|MY_CS_NONASCII,
|
|
{ charset_name_ucs2, charset_name_ucs2_length }, /* cs name */
|
|
{ STRING_WITH_LEN("ucs2_bin") }, /* name */
|
|
"", /* comment */
|
|
NULL, /* tailoring */
|
|
ctype_ucs2, /* ctype */
|
|
to_lower_ucs2, /* to_lower */
|
|
to_upper_ucs2, /* to_upper */
|
|
NULL, /* sort_order */
|
|
NULL, /* uca */
|
|
NULL, /* tab_to_uni */
|
|
NULL, /* tab_from_uni */
|
|
&my_unicase_default,/* caseinfo */
|
|
NULL, /* state_map */
|
|
NULL, /* ident_map */
|
|
1, /* strxfrm_multiply */
|
|
1, /* caseup_multiply */
|
|
1, /* casedn_multiply */
|
|
2, /* mbminlen */
|
|
2, /* mbmaxlen */
|
|
0, /* min_sort_char */
|
|
0xFFFF, /* max_sort_char */
|
|
' ', /* pad char */
|
|
0, /* escape_with_backslash_is_dangerous */
|
|
1, /* levels_for_order */
|
|
&my_charset_ucs2_handler,
|
|
&my_collation_ucs2_bin_handler
|
|
};
|
|
|
|
|
|
struct charset_info_st my_charset_ucs2_general_nopad_ci=
|
|
{
|
|
MY_NOPAD_ID(35),0,0, /* number */
|
|
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII|MY_CS_NOPAD,
|
|
{ charset_name_ucs2, charset_name_ucs2_length }, /* cs name */
|
|
{ STRING_WITH_LEN("ucs2_general_nopad_ci") }, /* name */
|
|
"", /* comment */
|
|
NULL, /* tailoring */
|
|
ctype_ucs2, /* ctype */
|
|
to_lower_ucs2, /* to_lower */
|
|
to_upper_ucs2, /* to_upper */
|
|
to_upper_ucs2, /* sort_order */
|
|
NULL, /* uca */
|
|
NULL, /* tab_to_uni */
|
|
NULL, /* tab_from_uni */
|
|
&my_unicase_default, /* caseinfo */
|
|
NULL, /* state_map */
|
|
NULL, /* ident_map */
|
|
1, /* strxfrm_multiply */
|
|
1, /* caseup_multiply */
|
|
1, /* casedn_multiply */
|
|
2, /* mbminlen */
|
|
2, /* mbmaxlen */
|
|
0, /* min_sort_char */
|
|
0xFFFF, /* max_sort_char */
|
|
' ', /* pad char */
|
|
0, /* escape_with_backslash_is_dangerous */
|
|
1, /* levels_for_order */
|
|
&my_charset_ucs2_handler,
|
|
&my_collation_ucs2_general_nopad_ci_handler
|
|
};
|
|
|
|
|
|
struct charset_info_st my_charset_ucs2_nopad_bin=
|
|
{
|
|
MY_NOPAD_ID(90),0,0, /* number */
|
|
MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE|MY_CS_NONASCII|MY_CS_NOPAD,
|
|
{ charset_name_ucs2, charset_name_ucs2_length }, /* cs name */
|
|
{ STRING_WITH_LEN("ucs2_nopad_bin") }, /* name */
|
|
"", /* comment */
|
|
NULL, /* tailoring */
|
|
ctype_ucs2, /* ctype */
|
|
to_lower_ucs2, /* to_lower */
|
|
to_upper_ucs2, /* to_upper */
|
|
NULL, /* sort_order */
|
|
NULL, /* uca */
|
|
NULL, /* tab_to_uni */
|
|
NULL, /* tab_from_uni */
|
|
&my_unicase_default, /* caseinfo */
|
|
NULL, /* state_map */
|
|
NULL, /* ident_map */
|
|
1, /* strxfrm_multiply */
|
|
1, /* caseup_multiply */
|
|
1, /* casedn_multiply */
|
|
2, /* mbminlen */
|
|
2, /* mbmaxlen */
|
|
0, /* min_sort_char */
|
|
0xFFFF, /* max_sort_char */
|
|
' ', /* pad char */
|
|
0, /* escape_with_backslash_is_dangerous */
|
|
1, /* levels_for_order */
|
|
&my_charset_ucs2_handler,
|
|
&my_collation_ucs2_nopad_bin_handler
|
|
};
|
|
|
|
#endif /* HAVE_CHARSET_ucs2 */
|