mariadb/strings/ctype-ucs2.c
Alexander Barkov 133446828c MDEV-27009 Add UCA-14.0.0 collations
- Added one neutral and 22 tailored (language specific) collations based on
  Unicode Collation Algorithm version 14.0.0.

  Collations were added for Unicode character sets
  utf8mb3, utf8mb4, ucs2, utf16, utf32.

  Every tailoring was added with four accent and case
  sensitivity flag combinations, e.g:

  * utf8mb4_uca1400_swedish_as_cs
  * utf8mb4_uca1400_swedish_as_ci
  * utf8mb4_uca1400_swedish_ai_cs
  * utf8mb4_uca1400_swedish_ai_ci

  and their _nopad_ variants:

  * utf8mb4_uca1400_swedish_nopad_as_cs
  * utf8mb4_uca1400_swedish_nopad_as_ci
  * utf8mb4_uca1400_swedish_nopad_ai_cs
  * utf8mb4_uca1400_swedish_nopad_ai_ci

- Introducing a conception of contextually typed named collations:

  CREATE DATABASE db1 CHARACTER SET utf8mb4;
  CREATE TABLE db1.t1 (a CHAR(10) COLLATE uca1400_as_ci);

  The idea is that there is no a need to specify the character set prefix
  in the new collation names. It's enough to type just the suffix
  "uca1400_as_ci". The character set is taken from the context.

  In the above example script the context character set is utf8mb4.
  So the CREATE TABLE will make a column with the collation
  utf8mb4_uca1400_as_ci.

  Short collations names can be used in any parts of the SQL syntax
  where the COLLATE clause is understood.

- New collations are displayed only one time
  (without character set combinations) by these statements:

     SELECT * FROM INFORMATION_SCHEMA.COLLATIONS;
     SHOW COLLATION;

  For example, all these collations:
  - utf8mb3_uca1400_swedish_as_ci
  - utf8mb4_uca1400_swedish_as_ci
  - ucs2_uca1400_swedish_as_ci
  - utf16_uca1400_swedish_as_ci
  - utf32_uca1400_swedish_as_ci
  have just one entry in INFORMATION_SCHEMA.COLLATIONS and SHOW COLLATION,
  with COLLATION_NAME equal to "uca1400_swedish_as_ci", which is the suffix
  without the character set name:

SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.COLLATIONS
WHERE COLLATION_NAME LIKE '%uca1400_swedish_as_ci';

+-----------------------+
| COLLATION_NAME        |
+-----------------------+
| uca1400_swedish_as_ci |
+-----------------------+

  Note, the behaviour of old collations did not change.
  Non-unicode collations (e.g. latin1_swedish_ci) and
  old UCA-4.0.0 collations (e.g. utf8mb4_unicode_ci)
  are still displayed with the character set prefix, as before.

- The structure of the table INFORMATION_SCHEMA.COLLATIONS was changed.

  The NOT NULL constraint was removed from these columns:
  - CHARACTER_SET_NAME
  - ID
  - IS_DEFAULT
  and from the corresponding columns in SHOW COLLATION.

  For example:

SELECT COLLATION_NAME, CHARACTER_SET_NAME, ID, IS_DEFAULT
FROM INFORMATION_SCHEMA.COLLATIONS
WHERE COLLATION_NAME LIKE '%uca1400_swedish_as_ci';
+-----------------------+--------------------+------+------------+
| COLLATION_NAME        | CHARACTER_SET_NAME | ID   | IS_DEFAULT |
+-----------------------+--------------------+------+------------+
| uca1400_swedish_as_ci | NULL               | NULL | NULL       |
+-----------------------+--------------------+------+------------+

  The NULL value in these columns now means that the collation
  is applicable to multiple character sets.
  The behavioir of old collations did not change.
  Make sure your client programs can handle NULL values in these columns.

- The structure of the table
  INFORMATION_SCHEMA.COLLATION_CHARACTER_SET_APPLICABILITY was changed.

  Three new NOT NULL columns were added:
  - FULL_COLLATION_NAME
  - ID
  - IS_DEFAULT

  New collations have multiple entries in COLLATION_CHARACTER_SET_APPLICABILITY.
  The column COLLATION_NAME contains the collation name without the character
  set prefix. The column FULL_COLLATION_NAME contains the collation name with
  the character set prefix.

  Old collations have full collation name in both FULL_COLLATION_NAME and
  COLLATION_NAME.

SELECT COLLATION_NAME, FULL_COLLATION_NAME, CHARACTER_SET_NAME, ID, IS_DEFAULT
FROM INFORMATION_SCHEMA.COLLATION_CHARACTER_SET_APPLICABILITY
WHERE FULL_COLLATION_NAME RLIKE '^(utf8mb4|latin1).*swedish.*ci$';
+-----------------------------+-------------------------------------+--------------------+------+------------+
| COLLATION_NAME              | FULL_COLLATION_NAME                 | CHARACTER_SET_NAME | ID   | IS_DEFAULT |
+-----------------------------+-------------------------------------+--------------------+------+------------+
| latin1_swedish_ci           | latin1_swedish_ci                   | latin1             |    8 | Yes        |
| latin1_swedish_nopad_ci     | latin1_swedish_nopad_ci             | latin1             | 1032 |            |
| utf8mb4_swedish_ci          | utf8mb4_swedish_ci                  | utf8mb4            |  232 |            |
| uca1400_swedish_ai_ci       | utf8mb4_uca1400_swedish_ai_ci       | utf8mb4            | 2368 |            |
| uca1400_swedish_as_ci       | utf8mb4_uca1400_swedish_as_ci       | utf8mb4            | 2370 |            |
| uca1400_swedish_nopad_ai_ci | utf8mb4_uca1400_swedish_nopad_ai_ci | utf8mb4            | 2372 |            |
| uca1400_swedish_nopad_as_ci | utf8mb4_uca1400_swedish_nopad_as_ci | utf8mb4            | 2374 |            |
+-----------------------------+-------------------------------------+--------------------+------+------------+

- Other INFORMATION_SCHEMA queries:

  SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.COLUMNS;
  SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.PARAMETERS;
  SELECT TABLE_COLLATION FROM INFORMATION_SCHEMA.TABLES;
  SELECT DEFAULT_COLLATION_NAME FROM INFORMATION_SCHEMA.SCHEMATA;
  SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.ROUTINES;
  SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.EVENTS;
  SELECT DATABASE_COLLATION FROM INFORMATION_SCHEMA.EVENTS;
  SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.ROUTINES;
  SELECT DATABASE_COLLATION FROM INFORMATION_SCHEMA.ROUTINES;
  SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.TRIGGERS;
  SELECT DATABASE_COLLATION FROM INFORMATION_SCHEMA.TRIGGERS;
  SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.VIEWS;

  display full collation names, including character sets prefix,
  for all collations, including new collations.

  Corresponding SHOW commands also display full collation names
  in collation related columns:

  SHOW CREATE TABLE t1;
  SHOW CREATE DATABASE db1;
  SHOW TABLE STATUS;
  SHOW CREATE FUNCTION f1;
  SHOW CREATE PROCEDURE p1;
  SHOW CREATE EVENT ev1;
  SHOW CREATE TRIGGER tr1;
  SHOW CREATE VIEW;

  These INFORMATION_SCHEMA queries and SHOW statements may change in
  the future, to display show collation names.
2022-08-10 15:04:24 +02:00

3605 lines
99 KiB
C

/* Copyright (c) 2003, 2013, Oracle and/or its affiliates
Copyright (c) 2009, 2020, MariaDB
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public
License as published by the Free Software Foundation; version 2
of the License.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with this library; if not, write to the Free
Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
MA 02110-1335 USA */
/* UCS2 support. Written by Alexander Barkov <bar@mysql.com> */
#include "strings_def.h"
#include <m_ctype.h>
#include "ctype-mb.h"
#include <my_sys.h>
#include <stdarg.h>
#include "ctype-unidata.h"
#if defined(HAVE_CHARSET_utf16) || defined(HAVE_CHARSET_ucs2)
#define HAVE_CHARSET_mb2
#endif
#if defined(HAVE_CHARSET_mb2) || defined(HAVE_CHARSET_utf32)
#define HAVE_CHARSET_mb2_or_mb4
#endif
#ifndef EILSEQ
#define EILSEQ ENOENT
#endif
#undef ULONGLONG_MAX
#define ULONGLONG_MAX (~(ulonglong) 0)
#define MAX_NEGATIVE_NUMBER ((ulonglong) 0x8000000000000000LL)
#define INIT_CNT 9
#define LFACTOR 1000000000ULL
#define LFACTOR1 10000000000ULL
#define LFACTOR2 100000000000ULL
#if defined(HAVE_CHARSET_utf32) || defined(HAVE_CHARSET_mb2)
static unsigned long lfactor[9]=
{ 1L, 10L, 100L, 1000L, 10000L, 100000L, 1000000L, 10000000L, 100000000L };
#endif
#ifdef HAVE_CHARSET_mb2_or_mb4
static size_t
my_caseup_str_mb2_or_mb4(CHARSET_INFO * cs __attribute__((unused)),
char * s __attribute__((unused)))
{
DBUG_ASSERT(0);
return 0;
}
static size_t
my_casedn_str_mb2_or_mb4(CHARSET_INFO *cs __attribute__((unused)),
char * s __attribute__((unused)))
{
DBUG_ASSERT(0);
return 0;
}
static int
my_strcasecmp_mb2_or_mb4(CHARSET_INFO *cs __attribute__((unused)),
const char *s __attribute__((unused)),
const char *t __attribute__((unused)))
{
DBUG_ASSERT(0);
return 0;
}
typedef enum
{
MY_CHAR_COPY_OK= 0, /* The character was Okey */
MY_CHAR_COPY_ERROR= 1, /* The character was not Ok, and could not fix */
MY_CHAR_COPY_FIXED= 2 /* The character was not Ok, was fixed to '?' */
} my_char_copy_status_t;
/*
Copies an incomplete character, lef-padding it with 0x00 bytes.
@param cs Character set
@param dst The destination string
@param dst_length Space available in dst
@param src The source string
@param src_length Length of src
@param nchars Copy not more than nchars characters.
The "nchars" parameter of the caller.
Only 0 and non-0 are important here.
@param fix What to do if after zero-padding didn't get a valid
character:
- FALSE - exit with error.
- TRUE - try to put '?' instead.
@return MY_CHAR_COPY_OK if after zero-padding got a valid character.
cs->mbmaxlen bytes were written to "dst".
@return MY_CHAR_COPY_FIXED if after zero-padding did not get a valid
character, but wrote '?' to the destination
string instead.
cs->mbminlen bytes were written to "dst".
@return MY_CHAR_COPY_ERROR If failed and nothing was written to "dst".
Possible reasons:
- dst_length was too short
- nchars was 0
- the character after padding appeared not
to be valid, and could not fix it to '?'.
*/
static my_char_copy_status_t
my_copy_incomplete_char(CHARSET_INFO *cs,
char *dst, size_t dst_length,
const char *src, size_t src_length,
size_t nchars, my_bool fix)
{
size_t pad_length;
size_t src_offset= src_length % cs->mbminlen;
if (dst_length < cs->mbminlen || !nchars)
return MY_CHAR_COPY_ERROR;
pad_length= cs->mbminlen - src_offset;
bzero(dst, pad_length);
memmove(dst + pad_length, src, src_offset);
/*
In some cases left zero-padding can create an incorrect character.
For example:
INSERT INTO t1 (utf32_column) VALUES (0x110000);
We'll pad the value to 0x00110000, which is a wrong UTF32 sequence!
The valid characters range is limited to 0x00000000..0x0010FFFF.
Make sure we didn't pad to an incorrect character.
*/
if (my_ci_charlen(cs, (uchar *) dst, (uchar *) dst + cs->mbminlen) ==
(int) cs->mbminlen)
return MY_CHAR_COPY_OK;
if (fix &&
my_ci_wc_mb(cs, '?', (uchar *) dst, (uchar *) dst + cs->mbminlen) ==
(int) cs->mbminlen)
return MY_CHAR_COPY_FIXED;
return MY_CHAR_COPY_ERROR;
}
/*
Copy an UCS2/UTF16/UTF32 string, fix bad characters.
*/
static size_t
my_copy_fix_mb2_or_mb4(CHARSET_INFO *cs,
char *dst, size_t dst_length,
const char *src, size_t src_length,
size_t nchars, MY_STRCOPY_STATUS *status)
{
size_t length2, src_offset= src_length % cs->mbminlen;
my_char_copy_status_t padstatus;
if (!src_offset)
return my_copy_fix_mb(cs, dst, dst_length,
src, src_length, nchars, status);
if ((padstatus= my_copy_incomplete_char(cs, dst, dst_length,
src, src_length, nchars, TRUE)) ==
MY_CHAR_COPY_ERROR)
{
status->m_source_end_pos= status->m_well_formed_error_pos= src;
return 0;
}
length2= my_copy_fix_mb(cs, dst + cs->mbminlen, dst_length - cs->mbminlen,
src + src_offset, src_length - src_offset,
nchars - 1, status);
if (padstatus == MY_CHAR_COPY_FIXED)
status->m_well_formed_error_pos= src;
return cs->mbminlen /* The left-padded character */ + length2;
}
static long
my_strntol_mb2_or_mb4(CHARSET_INFO *cs,
const char *nptr, size_t l, int base,
char **endptr, int *err)
{
int negative= 0;
int overflow;
int cnv;
my_wc_t wc;
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
register unsigned int cutlim;
register uint32 cutoff;
register uint32 res;
register const uchar *s= (const uchar*) nptr;
register const uchar *e= (const uchar*) nptr+l;
const uchar *save;
*err= 0;
do
{
if ((cnv= mb_wc(cs, &wc, s, e)) > 0)
{
switch (wc)
{
case ' ' : break;
case '\t': break;
case '-' : negative= !negative; break;
case '+' : break;
default : goto bs;
}
}
else /* No more characters or bad multibyte sequence */
{
if (endptr != NULL )
*endptr= (char*) s;
err[0]= (cnv==MY_CS_ILSEQ) ? EILSEQ : EDOM;
return 0;
}
s+= cnv;
} while (1);
bs:
overflow= 0;
res= 0;
save= s;
cutoff= ((uint32)~0L) / (uint32) base;
cutlim= (uint) (((uint32)~0L) % (uint32) base);
do {
if ((cnv= mb_wc(cs, &wc, s, e)) > 0)
{
s+= cnv;
if (wc >= '0' && wc <= '9')
wc-= '0';
else if (wc >= 'A' && wc <= 'Z')
wc= wc - 'A' + 10;
else if (wc >= 'a' && wc <= 'z')
wc= wc - 'a' + 10;
else
break;
if ((int)wc >= base)
break;
if (res > cutoff || (res == cutoff && wc > cutlim))
overflow= 1;
else
{
res*= (uint32) base;
res+= wc;
}
}
else if (cnv == MY_CS_ILSEQ)
{
if (endptr !=NULL )
*endptr = (char*) s;
err[0]= EILSEQ;
return 0;
}
else
{
/* No more characters */
break;
}
} while(1);
if (endptr != NULL)
*endptr = (char *) s;
if (s == save)
{
err[0]= EDOM;
return 0L;
}
if (negative)
{
if (res > (uint32) INT_MIN32)
overflow= 1;
}
else if (res > INT_MAX32)
overflow= 1;
if (overflow)
{
err[0]= ERANGE;
return negative ? INT_MIN32 : INT_MAX32;
}
return (negative ? -((long) res) : (long) res);
}
static ulong
my_strntoul_mb2_or_mb4(CHARSET_INFO *cs,
const char *nptr, size_t l, int base,
char **endptr, int *err)
{
int negative= 0;
int overflow;
int cnv;
my_wc_t wc;
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
register unsigned int cutlim;
register uint32 cutoff;
register uint32 res;
register const uchar *s= (const uchar*) nptr;
register const uchar *e= (const uchar*) nptr + l;
const uchar *save;
*err= 0;
do
{
if ((cnv= mb_wc(cs, &wc, s, e)) > 0)
{
switch (wc)
{
case ' ' : break;
case '\t': break;
case '-' : negative= !negative; break;
case '+' : break;
default : goto bs;
}
}
else /* No more characters or bad multibyte sequence */
{
if (endptr !=NULL )
*endptr= (char*)s;
err[0]= (cnv == MY_CS_ILSEQ) ? EILSEQ : EDOM;
return 0;
}
s+= cnv;
} while (1);
bs:
overflow= 0;
res= 0;
save= s;
cutoff= ((uint32)~0L) / (uint32) base;
cutlim= (uint) (((uint32)~0L) % (uint32) base);
do
{
if ((cnv= mb_wc(cs, &wc, s, e)) > 0)
{
s+= cnv;
if (wc >= '0' && wc <= '9')
wc-= '0';
else if (wc >= 'A' && wc <= 'Z')
wc= wc - 'A' + 10;
else if (wc >= 'a' && wc <= 'z')
wc= wc - 'a' + 10;
else
break;
if ((int) wc >= base)
break;
if (res > cutoff || (res == cutoff && wc > cutlim))
overflow = 1;
else
{
res*= (uint32) base;
res+= wc;
}
}
else if (cnv == MY_CS_ILSEQ)
{
if (endptr != NULL )
*endptr= (char*)s;
err[0]= EILSEQ;
return 0;
}
else
{
/* No more characters */
break;
}
} while(1);
if (endptr != NULL)
*endptr= (char *) s;
if (s == save)
{
err[0]= EDOM;
return 0L;
}
if (overflow)
{
err[0]= (ERANGE);
return (~(uint32) 0);
}
return (negative ? -((long) res) : (long) res);
}
static longlong
my_strntoll_mb2_or_mb4(CHARSET_INFO *cs,
const char *nptr, size_t l, int base,
char **endptr, int *err)
{
int negative=0;
int overflow;
int cnv;
my_wc_t wc;
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
register ulonglong cutoff;
register unsigned int cutlim;
register ulonglong res;
register const uchar *s= (const uchar*) nptr;
register const uchar *e= (const uchar*) nptr+l;
const uchar *save;
*err= 0;
do
{
if ((cnv= mb_wc(cs, &wc, s, e)) > 0)
{
switch (wc)
{
case ' ' : break;
case '\t': break;
case '-' : negative= !negative; break;
case '+' : break;
default : goto bs;
}
}
else /* No more characters or bad multibyte sequence */
{
if (endptr !=NULL )
*endptr = (char*)s;
err[0] = (cnv==MY_CS_ILSEQ) ? EILSEQ : EDOM;
return 0;
}
s+=cnv;
} while (1);
bs:
overflow = 0;
res = 0;
save = s;
cutoff = (~(ulonglong) 0) / (unsigned long int) base;
cutlim = (uint) ((~(ulonglong) 0) % (unsigned long int) base);
do {
if ((cnv= mb_wc(cs, &wc, s, e)) > 0)
{
s+=cnv;
if ( wc>='0' && wc<='9')
wc -= '0';
else if ( wc>='A' && wc<='Z')
wc = wc - 'A' + 10;
else if ( wc>='a' && wc<='z')
wc = wc - 'a' + 10;
else
break;
if ((int)wc >= base)
break;
if (res > cutoff || (res == cutoff && wc > cutlim))
overflow = 1;
else
{
res *= (ulonglong) base;
res += wc;
}
}
else if (cnv==MY_CS_ILSEQ)
{
if (endptr !=NULL )
*endptr = (char*)s;
err[0]=EILSEQ;
return 0;
}
else
{
/* No more characters */
break;
}
} while(1);
if (endptr != NULL)
*endptr = (char *) s;
if (s == save)
{
err[0]=EDOM;
return 0L;
}
if (negative)
{
if (res > (ulonglong) LONGLONG_MIN)
overflow = 1;
}
else if (res > (ulonglong) LONGLONG_MAX)
overflow = 1;
if (overflow)
{
err[0]=ERANGE;
return negative ? LONGLONG_MIN : LONGLONG_MAX;
}
return (negative ? -((longlong)res) : (longlong)res);
}
static ulonglong
my_strntoull_mb2_or_mb4(CHARSET_INFO *cs,
const char *nptr, size_t l, int base,
char **endptr, int *err)
{
int negative= 0;
int overflow;
int cnv;
my_wc_t wc;
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
register ulonglong cutoff;
register unsigned int cutlim;
register ulonglong res;
register const uchar *s= (const uchar*) nptr;
register const uchar *e= (const uchar*) nptr + l;
const uchar *save;
*err= 0;
do
{
if ((cnv= mb_wc(cs, &wc, s, e)) > 0)
{
switch (wc)
{
case ' ' : break;
case '\t': break;
case '-' : negative= !negative; break;
case '+' : break;
default : goto bs;
}
}
else /* No more characters or bad multibyte sequence */
{
if (endptr !=NULL )
*endptr = (char*)s;
err[0]= (cnv==MY_CS_ILSEQ) ? EILSEQ : EDOM;
return 0;
}
s+=cnv;
} while (1);
bs:
overflow = 0;
res = 0;
save = s;
cutoff = (~(ulonglong) 0) / (unsigned long int) base;
cutlim = (uint) ((~(ulonglong) 0) % (unsigned long int) base);
do
{
if ((cnv= mb_wc(cs, &wc, s, e)) > 0)
{
s+=cnv;
if ( wc>='0' && wc<='9')
wc -= '0';
else if ( wc>='A' && wc<='Z')
wc = wc - 'A' + 10;
else if ( wc>='a' && wc<='z')
wc = wc - 'a' + 10;
else
break;
if ((int)wc >= base)
break;
if (res > cutoff || (res == cutoff && wc > cutlim))
overflow = 1;
else
{
res *= (ulonglong) base;
res += wc;
}
}
else if (cnv==MY_CS_ILSEQ)
{
if (endptr !=NULL )
*endptr = (char*)s;
err[0]= EILSEQ;
return 0;
}
else
{
/* No more characters */
break;
}
} while(1);
if (endptr != NULL)
*endptr = (char *) s;
if (s == save)
{
err[0]= EDOM;
return 0L;
}
if (overflow)
{
err[0]= ERANGE;
return (~(ulonglong) 0);
}
return (negative ? -((longlong) res) : (longlong) res);
}
static double
my_strntod_mb2_or_mb4(CHARSET_INFO *cs,
char *nptr, size_t length,
char **endptr, int *err)
{
char buf[256];
double res;
register char *b= buf;
register const uchar *s= (const uchar*) nptr;
const uchar *end;
my_wc_t wc;
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
int cnv;
*err= 0;
/* Cut too long strings */
if (length >= sizeof(buf))
length= sizeof(buf) - 1;
end= s + length;
while ((cnv= mb_wc(cs, &wc, s, end)) > 0)
{
s+= cnv;
if (wc > (int) (uchar) 'e' || !wc)
break; /* Can't be part of double */
*b++= (char) wc;
}
*endptr= b;
res= my_strtod(buf, endptr, err);
*endptr= nptr + cs->mbminlen * (size_t) (*endptr - buf);
return res;
}
static ulonglong
my_strntoull10rnd_mb2_or_mb4(CHARSET_INFO *cs,
const char *nptr, size_t length,
int unsign_fl,
char **endptr, int *err)
{
char buf[256], *b= buf;
ulonglong res;
const uchar *end, *s= (const uchar*) nptr;
my_wc_t wc;
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
int cnv;
/* Cut too long strings */
if (length >= sizeof(buf))
length= sizeof(buf)-1;
end= s + length;
while ((cnv= mb_wc(cs, &wc, s, end)) > 0)
{
s+= cnv;
if (wc > (int) (uchar) 'e' || !wc)
break; /* Can't be a number part */
*b++= (char) wc;
}
res= my_strntoull10rnd_8bit(cs, buf, b - buf, unsign_fl, endptr, err);
*endptr= (char*) nptr + cs->mbminlen * (size_t) (*endptr - buf);
return res;
}
/*
This is a fast version optimized for the case of radix 10 / -10
*/
static size_t
my_l10tostr_mb2_or_mb4(CHARSET_INFO *cs,
char *dst, size_t len, int radix, long int val)
{
char buffer[66];
register char *p, *db, *de;
long int new_val;
int sl= 0;
unsigned long int uval = (unsigned long int) val;
p= &buffer[sizeof(buffer) - 1];
*p= '\0';
if (radix < 0)
{
if (val < 0)
{
sl= 1;
/* Avoid integer overflow in (-val) for LONGLONG_MIN (BUG#31799). */
uval = (unsigned long int)0 - uval;
}
}
new_val = (long) (uval / 10);
*--p = '0'+ (char) (uval - (unsigned long) new_val * 10);
val= new_val;
while (val != 0)
{
new_val= val / 10;
*--p= '0' + (char) (val - new_val * 10);
val= new_val;
}
if (sl)
{
*--p= '-';
}
for ( db= dst, de= dst + len ; (dst < de) && *p ; p++)
{
int cnvres= my_ci_wc_mb(cs, (my_wc_t) p[0], (uchar*) dst, (uchar*) de);
if (cnvres > 0)
dst+= cnvres;
else
break;
}
return (int) (dst - db);
}
static size_t
my_ll10tostr_mb2_or_mb4(CHARSET_INFO *cs,
char *dst, size_t len, int radix, longlong val)
{
char buffer[65];
register char *p, *db, *de;
long long_val;
int sl= 0;
ulonglong uval= (ulonglong) val;
if (radix < 0)
{
if (val < 0)
{
sl= 1;
/* Avoid integer overflow in (-val) for LONGLONG_MIN (BUG#31799). */
uval = (ulonglong)0 - uval;
}
}
p= &buffer[sizeof(buffer)-1];
*p='\0';
if (uval == 0)
{
*--p= '0';
goto cnv;
}
while (uval > (ulonglong) LONG_MAX)
{
ulonglong quo= uval/(uint) 10;
uint rem= (uint) (uval- quo* (uint) 10);
*--p= '0' + rem;
uval= quo;
}
long_val= (long) uval;
while (long_val != 0)
{
long quo= long_val/10;
*--p= (char) ('0' + (long_val - quo*10));
long_val= quo;
}
cnv:
if (sl)
{
*--p= '-';
}
for ( db= dst, de= dst + len ; (dst < de) && *p ; p++)
{
int cnvres= my_ci_wc_mb(cs, (my_wc_t) p[0], (uchar*) dst, (uchar*) de);
if (cnvres > 0)
dst+= cnvres;
else
break;
}
return (int) (dst -db);
}
#endif /* HAVE_CHARSET_mb2_or_mb4 */
#ifdef HAVE_CHARSET_mb2
/**
Convert a Unicode code point to a digit.
@param wc - the input Unicode code point
@param[OUT] c - the output character representing the digit value 0..9
@return 0 - if wc is a good digit
@return 1 - if wc is not a digit
*/
static inline my_bool
wc2digit_uchar(uchar *c, my_wc_t wc)
{
return wc > '9' || (c[0]= (uchar) (wc - '0')) > 9;
}
static longlong
my_strtoll10_mb2(CHARSET_INFO *cs __attribute__((unused)),
const char *nptr, char **endptr, int *error)
{
const uchar *s, *end, *start, *n_end, *true_end;
uchar UNINIT_VAR(c);
unsigned long i, j, k;
ulonglong li;
int negative;
ulong cutoff, cutoff2, cutoff3;
my_wc_t wc;
int res;
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
s= (const uchar *) nptr;
/* If fixed length string */
if (endptr)
{
/*
Make sure string length is even.
Odd length indicates a bug in the caller.
Assert in debug, round in production.
*/
DBUG_ASSERT((*endptr - (const char *) s) % 2 == 0);
end= s + ((*endptr - (const char*) s) / 2) * 2;
for ( ; ; ) /* Skip leading spaces and tabs */
{
if ((res= mb_wc(cs, &wc, s, end)) <= 0)
goto no_conv;
s+= res;
if (wc != ' ' && wc != '\t')
break;
}
}
else
{
/* We don't support null terminated strings in UCS2 */
goto no_conv;
}
/* Check for a sign. */
negative= 0;
if (wc == '-')
{
*error= -1; /* Mark as negative number */
negative= 1;
if ((res= mb_wc(cs, &wc, s, end)) <= 0)
goto no_conv;
s+= res; /* wc is now expected to hold the first digit. */
cutoff= MAX_NEGATIVE_NUMBER / LFACTOR2;
cutoff2= (MAX_NEGATIVE_NUMBER % LFACTOR2) / 100;
cutoff3= MAX_NEGATIVE_NUMBER % 100;
}
else
{
*error= 0;
if (wc == '+')
{
if ((res= mb_wc(cs, &wc, s, end)) <= 0)
goto no_conv;
s+= res; /* wc is now expected to hold the first digit. */
}
cutoff= ULONGLONG_MAX / LFACTOR2;
cutoff2= ULONGLONG_MAX % LFACTOR2 / 100;
cutoff3= ULONGLONG_MAX % 100;
}
/*
The code below assumes that 'wc' holds the first digit
and 's' points to the next character after it.
Scan pre-zeros if any.
*/
if (wc == '0')
{
i= 0;
for ( ; ; s+= res)
{
if (s == end)
goto end_i; /* Return 0 */
if ((res= mb_wc(cs, &wc, s, end)) <= 0)
goto no_conv;
if (wc != '0')
break;
}
n_end= s + 2 * INIT_CNT;
}
else
{
/* Read first digit to check that it's a valid number */
if ((i= (wc - '0')) > 9)
goto no_conv;
n_end= s + 2 * (INIT_CNT-1);
}
/* Handle first 9 digits and store them in i */
if (n_end > end)
n_end= end;
for ( ; ; s+= res)
{
if ((res= mb_wc(cs, &wc, s, n_end)) <= 0)
break;
if (wc2digit_uchar(&c, wc))
goto end_i;
i= i*10+c;
}
if (s == end)
goto end_i;
/* Handle next 9 digits and store them in j */
j= 0;
start= s; /* Used to know how much to shift i */
n_end= true_end= s + 2 * INIT_CNT;
if (n_end > end)
n_end= end;
do
{
if ((res= mb_wc(cs, &wc, s, end)) <= 0)
goto no_conv;
if (wc2digit_uchar(&c, wc))
goto end_i_and_j;
s+= res;
j= j * 10 + c;
} while (s != n_end);
if (s == end)
{
if (s != true_end)
goto end_i_and_j;
goto end3;
}
/* Handle the next 1 or 2 digits and store them in k */
if ((res= mb_wc(cs, &wc, s, end)) <= 0)
goto no_conv;
if ((k= (wc - '0')) > 9)
goto end3;
s+= res;
if (s == end)
goto end4;
if ((res= mb_wc(cs, &wc, s, end)) <= 0)
goto no_conv;
if (wc2digit_uchar(&c, wc))
goto end4;
s+= res;
k= k*10+c;
*endptr= (char*) s;
/* number string should have ended here */
if (s != end && mb_wc(cs, &wc, s, end) > 0 && ((uchar) (wc - '0')) <= 9)
goto overflow;
/* Check that we didn't get an overflow with the last digit */
if (i > cutoff || (i == cutoff && ((j > cutoff2 || j == cutoff2) &&
k > cutoff3)))
goto overflow;
li=i*LFACTOR2+ (ulonglong) j*100 + k;
return (longlong) li;
overflow: /* *endptr is set here */
*error= MY_ERRNO_ERANGE;
return negative ? LONGLONG_MIN : (longlong) ULONGLONG_MAX;
end_i:
*endptr= (char*) s;
return (negative ? ((longlong) -(long) i) : (longlong) i);
end_i_and_j:
li= (ulonglong) i * lfactor[(size_t) (s-start) / 2] + j;
*endptr= (char*) s;
return (negative ? -((longlong) li) : (longlong) li);
end3:
li=(ulonglong) i*LFACTOR+ (ulonglong) j;
*endptr= (char*) s;
return (negative ? -((longlong) li) : (longlong) li);
end4:
li=(ulonglong) i*LFACTOR1+ (ulonglong) j * 10 + k;
*endptr= (char*) s;
if (negative)
{
if (li > MAX_NEGATIVE_NUMBER)
goto overflow;
return -((longlong) li);
}
return (longlong) li;
no_conv:
/* There was no number to convert. */
*error= MY_ERRNO_EDOM;
*endptr= (char *) nptr;
return 0;
}
static size_t
my_scan_mb2(CHARSET_INFO *cs __attribute__((unused)),
const char *str, const char *end, int sequence_type)
{
const char *str0= str;
my_wc_t wc;
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
int res;
switch (sequence_type)
{
case MY_SEQ_SPACES:
for (res= mb_wc(cs, &wc, (const uchar *) str, (const uchar *) end);
res > 0 && wc == ' ';
str+= res,
res= mb_wc(cs, &wc, (const uchar *) str, (const uchar *) end))
{
}
return (size_t) (str - str0);
case MY_SEQ_NONSPACES:
DBUG_ASSERT(0); /* Not implemented */
/* pass through */
default:
return 0;
}
}
static void
my_fill_mb2(CHARSET_INFO *cs, char *s, size_t slen, int fill)
{
char buf[10], *last;
size_t buflen, remainder;
DBUG_ASSERT((slen % 2) == 0);
buflen= my_ci_wc_mb(cs, (my_wc_t) fill, (uchar*) buf,
(uchar*) buf + sizeof(buf));
DBUG_ASSERT(buflen > 0);
/*
"last" in the last position where a sequence of "buflen" bytes can start.
*/
for (last= s + slen - buflen; s <= last; s+= buflen)
{
/* Enough space for the character */
memcpy(s, buf, buflen);
}
/*
If there are some more space which is not enough
for the whole multibyte character, then add trailing zeros.
*/
if ((remainder= last + buflen - s) > 0)
bzero(s, (size_t) remainder);
}
static size_t
my_vsnprintf_mb2(char *dst, size_t n, const char* fmt, va_list ap)
{
char *start=dst, *end= dst + n - 1;
for (; *fmt ; fmt++)
{
if (fmt[0] != '%')
{
if (dst == end) /* End of buffer */
break;
*dst++='\0';
*dst++= *fmt; /* Copy ordinary char */
continue;
}
fmt++;
/* Skip if max size is used (to be compatible with printf) */
while ( (*fmt >= '0' && *fmt <= '9') || *fmt == '.' || *fmt == '-')
fmt++;
if (*fmt == 'l')
fmt++;
if (*fmt == 's') /* String parameter */
{
char *par= va_arg(ap, char *);
size_t plen;
size_t left_len= (size_t)(end-dst);
if (!par)
par= (char*) "(null)";
plen= strlen(par);
if (left_len <= plen * 2)
plen = left_len / 2 - 1;
for ( ; plen ; plen--, dst+=2, par++)
{
dst[0]= '\0';
dst[1]= par[0];
}
continue;
}
else if (*fmt == 'd' || *fmt == 'u') /* Integer parameter */
{
int iarg;
char nbuf[16];
char *pbuf= nbuf;
if ((size_t) (end - dst) < 32)
break;
iarg= va_arg(ap, int);
if (*fmt == 'd')
int10_to_str((long) iarg, nbuf, -10);
else
int10_to_str((long) (uint) iarg, nbuf,10);
for (; pbuf[0]; pbuf++)
{
*dst++= '\0';
*dst++= *pbuf;
}
continue;
}
/* We come here on '%%', unknown code or too long parameter */
if (dst == end)
break;
*dst++= '\0';
*dst++= '%'; /* % used as % or unknown code */
}
DBUG_ASSERT(dst <= end);
*dst='\0'; /* End of errmessage */
return (size_t) (dst - start);
}
static size_t
my_snprintf_mb2(CHARSET_INFO *cs __attribute__((unused)),
char* to, size_t n, const char* fmt, ...)
{
size_t ret;
va_list args;
va_start(args,fmt);
ret= my_vsnprintf_mb2(to, n, fmt, args);
va_end(args);
return ret;
}
static size_t
my_lengthsp_mb2(CHARSET_INFO *cs __attribute__((unused)),
const char *ptr, size_t length)
{
const char *end= ptr + length;
while (end > ptr + 1 && end[-1] == ' ' && end[-2] == '\0')
end-= 2;
return (size_t) (end - ptr);
}
#endif /* HAVE_CHARSET_mb2*/
/*
Next part is actually HAVE_CHARSET_utf16-specific,
but the JSON functions needed my_utf16_uni()
so the #ifdef was moved lower.
*/
#include "ctype-utf16.h"
#define IS_MB2_CHAR(b0,b1) (!MY_UTF16_SURROGATE_HEAD(b0))
#define IS_MB4_CHAR(b0,b1,b2,b3) (MY_UTF16_HIGH_HEAD(b0) && MY_UTF16_LOW_HEAD(b2))
static inline int my_weight_mb2_utf16mb2_general_ci(uchar b0, uchar b1)
{
my_wc_t wc= MY_UTF16_WC2(b0, b1);
MY_UNICASE_CHARACTER *page= my_unicase_default_pages[wc >> 8];
return (int) (page ? page[wc & 0xFF].sort : wc);
}
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16_general_ci
#define DEFINE_STRNXFRM_UNICODE
#define DEFINE_STRNXFRM_UNICODE_NOPAD
#define MY_MB_WC(cs, pwc, s, e) my_mb_wc_utf16_quick(pwc, s, e)
#define OPTIMIZE_ASCII 0
#define UNICASE_MAXCHAR MY_UNICASE_INFO_DEFAULT_MAXCHAR
#define UNICASE_PAGE0 my_unicase_default_page00
#define UNICASE_PAGES my_unicase_default_pages
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
#define WEIGHT_MB2(b0,b1) my_weight_mb2_utf16mb2_general_ci(b0,b1)
#define WEIGHT_MB4(b0,b1,b2,b3) MY_CS_REPLACEMENT_CHARACTER
#include "strcoll.inl"
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16_bin
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
#define WEIGHT_MB2(b0,b1) ((int) MY_UTF16_WC2(b0, b1))
#define WEIGHT_MB4(b0,b1,b2,b3) ((int) MY_UTF16_WC4(b0, b1, b2, b3))
#include "strcoll.inl"
#define DEFINE_STRNNCOLLSP_NOPAD
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16_general_nopad_ci
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
#define WEIGHT_MB2(b0,b1) my_weight_mb2_utf16mb2_general_ci(b0,b1)
#define WEIGHT_MB4(b0,b1,b2,b3) MY_CS_REPLACEMENT_CHARACTER
#include "strcoll.inl"
#define DEFINE_STRNNCOLLSP_NOPAD
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16_nopad_bin
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
#define WEIGHT_MB2(b0,b1) ((int) MY_UTF16_WC2(b0, b1))
#define WEIGHT_MB4(b0,b1,b2,b3) ((int) MY_UTF16_WC4(b0, b1, b2, b3))
#include "strcoll.inl"
#undef IS_MB2_CHAR
#undef IS_MB4_CHAR
/*
These two functions are used in JSON library, so made exportable
and unconditionally compiled into the library.
*/
/*static*/ int
my_utf16_uni(CHARSET_INFO *cs __attribute__((unused)),
my_wc_t *pwc, const uchar *s, const uchar *e)
{
return my_mb_wc_utf16_quick(pwc, s, e);
}
/*static*/ int
my_uni_utf16(CHARSET_INFO *cs __attribute__((unused)),
my_wc_t wc, uchar *s, uchar *e)
{
if (wc <= 0xFFFF)
{
if (s + 2 > e)
return MY_CS_TOOSMALL2;
if (MY_UTF16_SURROGATE(wc))
return MY_CS_ILUNI;
*s++= (uchar) (wc >> 8);
*s= (uchar) (wc & 0xFF);
return 2;
}
if (wc <= 0x10FFFF)
{
if (s + 4 > e)
return MY_CS_TOOSMALL4;
*s++= (uchar) ((wc-= 0x10000) >> 18) | 0xD8;
*s++= (uchar) (wc >> 10) & 0xFF;
*s++= (uchar) ((wc >> 8) & 3) | 0xDC;
*s= (uchar) wc & 0xFF;
return 4;
}
return MY_CS_ILUNI;
}
#ifdef HAVE_CHARSET_utf16
const char charset_name_utf16le[]= "utf16le";
#define charset_name_utf16le_length (sizeof(charset_name_utf16le)-1)
static inline void
my_tolower_utf16(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
{
MY_UNICASE_CHARACTER *page;
if ((*wc <= uni_plane->maxchar) && (page= uni_plane->page[*wc >> 8]))
*wc= page[*wc & 0xFF].tolower;
}
static inline void
my_toupper_utf16(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
{
MY_UNICASE_CHARACTER *page;
if ((*wc <= uni_plane->maxchar) && (page= uni_plane->page[*wc >> 8]))
*wc= page[*wc & 0xFF].toupper;
}
static inline void
my_tosort_utf16(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
{
if (*wc <= uni_plane->maxchar)
{
MY_UNICASE_CHARACTER *page;
if ((page= uni_plane->page[*wc >> 8]))
*wc= page[*wc & 0xFF].sort;
}
else
{
*wc= MY_CS_REPLACEMENT_CHARACTER;
}
}
static size_t
my_caseup_utf16(CHARSET_INFO *cs, const char *src, size_t srclen,
char *dst, size_t dstlen)
{
my_wc_t wc;
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
my_charset_conv_wc_mb wc_mb= cs->cset->wc_mb;
int res;
const char *srcend= src + srclen;
char *dstend= dst + dstlen;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
DBUG_ASSERT(srclen <= dstlen);
while ((src < srcend) &&
(res= mb_wc(cs, &wc, (uchar *) src, (uchar *) srcend)) > 0)
{
my_toupper_utf16(uni_plane, &wc);
if (res != wc_mb(cs, wc, (uchar *) dst, (uchar *) dstend))
break;
src+= res;
dst+= res;
}
return srclen;
}
static void
my_hash_sort_utf16_nopad(CHARSET_INFO *cs,
const uchar *s, size_t slen,
ulong *nr1, ulong *nr2)
{
my_wc_t wc;
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
int res;
const uchar *e= s + slen;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
register ulong m1= *nr1, m2= *nr2;
while ((s < e) && (res= mb_wc(cs, &wc, (uchar *) s, (uchar *) e)) > 0)
{
my_tosort_utf16(uni_plane, &wc);
MY_HASH_ADD_16(m1, m2, wc);
s+= res;
}
*nr1= m1;
*nr2= m2;
}
static void
my_hash_sort_utf16(CHARSET_INFO *cs, const uchar *s, size_t slen,
ulong *nr1, ulong *nr2)
{
size_t lengthsp= my_ci_lengthsp(cs, (const char *) s, slen);
my_hash_sort_utf16_nopad(cs, s, lengthsp, nr1, nr2);
}
static size_t
my_casedn_utf16(CHARSET_INFO *cs, const char *src, size_t srclen,
char *dst, size_t dstlen)
{
my_wc_t wc;
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
my_charset_conv_wc_mb wc_mb= cs->cset->wc_mb;
int res;
const char *srcend= src + srclen;
char *dstend= dst + dstlen;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
DBUG_ASSERT(srclen <= dstlen);
while ((src < srcend) &&
(res= mb_wc(cs, &wc, (uchar *) src, (uchar *) srcend)) > 0)
{
my_tolower_utf16(uni_plane, &wc);
if (res != wc_mb(cs, wc, (uchar *) dst, (uchar *) dstend))
break;
src+= res;
dst+= res;
}
return srclen;
}
static int
my_charlen_utf16(CHARSET_INFO *cs, const uchar *str, const uchar *end)
{
my_wc_t wc;
return my_ci_mb_wc(cs, &wc, str, end);
}
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16
#define CHARLEN(cs,str,end) my_charlen_utf16(cs,str,end)
#define DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
#include "ctype-mb.inl"
#undef MY_FUNCTION_NAME
#undef CHARLEN
#undef DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
/* Defines my_well_formed_char_length_utf16 */
static size_t
my_numchars_utf16(CHARSET_INFO *cs,
const char *b, const char *e)
{
size_t nchars= 0;
for ( ; ; nchars++)
{
size_t charlen= my_ismbchar(cs, b, e);
if (!charlen)
break;
b+= charlen;
}
return nchars;
}
static size_t
my_charpos_utf16(CHARSET_INFO *cs,
const char *b, const char *e, size_t pos)
{
const char *b0= b;
uint charlen;
for ( ; pos; b+= charlen, pos--)
{
if (!(charlen= my_ismbchar(cs, b, e)))
return (e + 2 - b0); /* Error, return pos outside the string */
}
return (size_t) (pos ? (e + 2 - b0) : (b - b0));
}
static int
my_wildcmp_utf16_ci(CHARSET_INFO *cs,
const char *str,const char *str_end,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many)
{
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend,
escape, w_one, w_many, uni_plane);
}
static int
my_wildcmp_utf16_bin(CHARSET_INFO *cs,
const char *str,const char *str_end,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many)
{
return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend,
escape, w_one, w_many, NULL);
}
static void
my_hash_sort_utf16_nopad_bin(CHARSET_INFO *cs __attribute__((unused)),
const uchar *pos, size_t len,
ulong *nr1, ulong *nr2)
{
const uchar *end= pos + len;
register ulong m1= *nr1, m2= *nr2;
for ( ; pos < end ; pos++)
{
MY_HASH_ADD(m1, m2, (uint)*pos);
}
*nr1= m1;
*nr2= m2;
}
static void
my_hash_sort_utf16_bin(CHARSET_INFO *cs,
const uchar *pos, size_t len, ulong *nr1, ulong *nr2)
{
size_t lengthsp= my_ci_lengthsp(cs, (const char *) pos, len);
my_hash_sort_utf16_nopad_bin(cs, pos, lengthsp, nr1, nr2);
}
static MY_COLLATION_HANDLER my_collation_utf16_general_ci_handler =
{
NULL, /* init */
my_strnncoll_utf16_general_ci,
my_strnncollsp_utf16_general_ci,
my_strnncollsp_nchars_utf16_general_ci,
my_strnxfrm_utf16_general_ci,
my_strnxfrmlen_unicode,
my_like_range_generic,
my_wildcmp_utf16_ci,
my_strcasecmp_mb2_or_mb4,
my_instr_mb,
my_hash_sort_utf16,
my_propagate_simple,
my_min_str_mb_simple,
my_max_str_mb_simple,
my_ci_get_id_generic,
my_ci_get_collation_name_generic
};
static MY_COLLATION_HANDLER my_collation_utf16_bin_handler =
{
NULL, /* init */
my_strnncoll_utf16_bin,
my_strnncollsp_utf16_bin,
my_strnncollsp_nchars_utf16_bin,
my_strnxfrm_unicode_full_bin,
my_strnxfrmlen_unicode_full_bin,
my_like_range_generic,
my_wildcmp_utf16_bin,
my_strcasecmp_mb2_or_mb4,
my_instr_mb,
my_hash_sort_utf16_bin,
my_propagate_simple,
my_min_str_mb_simple,
my_max_str_mb_simple,
my_ci_get_id_generic,
my_ci_get_collation_name_generic
};
static MY_COLLATION_HANDLER my_collation_utf16_general_nopad_ci_handler =
{
NULL, /* init */
my_strnncoll_utf16_general_ci,
my_strnncollsp_utf16_general_nopad_ci,
my_strnncollsp_nchars_utf16_general_nopad_ci,
my_strnxfrm_nopad_utf16_general_ci,
my_strnxfrmlen_unicode,
my_like_range_generic,
my_wildcmp_utf16_ci,
my_strcasecmp_mb2_or_mb4,
my_instr_mb,
my_hash_sort_utf16_nopad,
my_propagate_simple,
my_min_str_mb_simple_nopad,
my_max_str_mb_simple,
my_ci_get_id_generic,
my_ci_get_collation_name_generic
};
static MY_COLLATION_HANDLER my_collation_utf16_nopad_bin_handler =
{
NULL, /* init */
my_strnncoll_utf16_bin,
my_strnncollsp_utf16_nopad_bin,
my_strnncollsp_nchars_utf16_nopad_bin,
my_strnxfrm_unicode_full_nopad_bin,
my_strnxfrmlen_unicode_full_bin,
my_like_range_generic,
my_wildcmp_utf16_bin,
my_strcasecmp_mb2_or_mb4,
my_instr_mb,
my_hash_sort_utf16_nopad_bin,
my_propagate_simple,
my_min_str_mb_simple_nopad,
my_max_str_mb_simple,
my_ci_get_id_generic,
my_ci_get_collation_name_generic
};
MY_CHARSET_HANDLER my_charset_utf16_handler=
{
NULL, /* init */
my_numchars_utf16,
my_charpos_utf16,
my_lengthsp_mb2,
my_numcells_mb,
my_utf16_uni, /* mb_wc */
my_uni_utf16, /* wc_mb */
my_mb_ctype_mb,
my_caseup_str_mb2_or_mb4,
my_casedn_str_mb2_or_mb4,
my_caseup_utf16,
my_casedn_utf16,
my_snprintf_mb2,
my_l10tostr_mb2_or_mb4,
my_ll10tostr_mb2_or_mb4,
my_fill_mb2,
my_strntol_mb2_or_mb4,
my_strntoul_mb2_or_mb4,
my_strntoll_mb2_or_mb4,
my_strntoull_mb2_or_mb4,
my_strntod_mb2_or_mb4,
my_strtoll10_mb2,
my_strntoull10rnd_mb2_or_mb4,
my_scan_mb2,
my_charlen_utf16,
my_well_formed_char_length_utf16,
my_copy_fix_mb2_or_mb4,
my_uni_utf16,
my_wc_to_printable_generic
};
struct charset_info_st my_charset_utf16_general_ci=
{
54,0,0, /* number */
MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
{ charset_name_utf16, charset_name_utf16_length }, /* cs name */
{ STRING_WITH_LEN("utf16_general_ci") }, /* name */
"UTF-16 Unicode", /* comment */
NULL, /* tailoring */
NULL, /* ctype */
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
&my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
1, /* caseup_multiply */
1, /* casedn_multiply */
2, /* mbminlen */
4, /* mbmaxlen */
0, /* min_sort_char */
0xFFFF, /* max_sort_char */
' ', /* pad char */
0, /* escape_with_backslash_is_dangerous */
MY_CS_COLL_LEVELS_S1,
&my_charset_utf16_handler,
&my_collation_utf16_general_ci_handler
};
struct charset_info_st my_charset_utf16_bin=
{
55,0,0, /* number */
MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
{ charset_name_utf16, charset_name_utf16_length }, /* cs name */
{ STRING_WITH_LEN("utf16_bin") }, /* name */
"UTF-16 Unicode", /* comment */
NULL, /* tailoring */
NULL, /* ctype */
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
&my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
1, /* caseup_multiply */
1, /* casedn_multiply */
2, /* mbminlen */
4, /* mbmaxlen */
0, /* min_sort_char */
0xFFFF, /* max_sort_char */
' ', /* pad char */
0, /* escape_with_backslash_is_dangerous */
MY_CS_COLL_LEVELS_S1,
&my_charset_utf16_handler,
&my_collation_utf16_bin_handler
};
struct charset_info_st my_charset_utf16_general_nopad_ci=
{
MY_NOPAD_ID(54),0,0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII|MY_CS_NOPAD,
{ charset_name_utf16, charset_name_utf16_length }, /* cs name */
{ STRING_WITH_LEN("utf16_general_nopad_ci") }, /* name */
"UTF-16 Unicode", /* comment */
NULL, /* tailoring */
NULL, /* ctype */
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
&my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
1, /* caseup_multiply */
1, /* casedn_multiply */
2, /* mbminlen */
4, /* mbmaxlen */
0, /* min_sort_char */
0xFFFF, /* max_sort_char */
' ', /* pad char */
0, /* escape_with_backslash_is_dangerous */
MY_CS_COLL_LEVELS_S1,
&my_charset_utf16_handler,
&my_collation_utf16_general_nopad_ci_handler
};
struct charset_info_st my_charset_utf16_nopad_bin=
{
MY_NOPAD_ID(55),0,0, /* number */
MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII|
MY_CS_NOPAD,
{ charset_name_utf16, charset_name_utf16_length}, /* cs name */
{ STRING_WITH_LEN("utf16_nopad_bin") }, /* name */
"UTF-16 Unicode", /* comment */
NULL, /* tailoring */
NULL, /* ctype */
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
&my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
1, /* caseup_multiply */
1, /* casedn_multiply */
2, /* mbminlen */
4, /* mbmaxlen */
0, /* min_sort_char */
0xFFFF, /* max_sort_char */
' ', /* pad char */
0, /* escape_with_backslash_is_dangerous */
MY_CS_COLL_LEVELS_S1,
&my_charset_utf16_handler,
&my_collation_utf16_nopad_bin_handler
};
#define IS_MB2_CHAR(b0,b1) (!MY_UTF16_SURROGATE_HEAD(b1))
#define IS_MB4_CHAR(b0,b1,b2,b3) (MY_UTF16_HIGH_HEAD(b1) && MY_UTF16_LOW_HEAD(b3))
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16le_general_ci
#define DEFINE_STRNXFRM_UNICODE
#define DEFINE_STRNXFRM_UNICODE_NOPAD
#define MY_MB_WC(cs, pwc, s, e) (my_ci_mb_wc(cs, pwc, s, e))
#define OPTIMIZE_ASCII 0
#define UNICASE_MAXCHAR MY_UNICASE_INFO_DEFAULT_MAXCHAR
#define UNICASE_PAGE0 my_unicase_default_page00
#define UNICASE_PAGES my_unicase_default_pages
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
#define WEIGHT_MB2(b0,b1) my_weight_mb2_utf16mb2_general_ci(b1,b0)
#define WEIGHT_MB4(b0,b1,b2,b3) MY_CS_REPLACEMENT_CHARACTER
#include "strcoll.inl"
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16le_bin
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
#define WEIGHT_MB2(b0,b1) ((int) MY_UTF16_WC2(b1, b0))
#define WEIGHT_MB4(b0,b1,b2,b3) ((int) MY_UTF16_WC4(b1, b0, b3, b2))
#include "strcoll.inl"
#define DEFINE_STRNNCOLLSP_NOPAD
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16le_general_nopad_ci
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
#define WEIGHT_MB2(b0,b1) my_weight_mb2_utf16mb2_general_ci(b1,b0)
#define WEIGHT_MB4(b0,b1,b2,b3) MY_CS_REPLACEMENT_CHARACTER
#include "strcoll.inl"
#define DEFINE_STRNNCOLLSP_NOPAD
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16le_nopad_bin
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
#define WEIGHT_MB2(b0,b1) ((int) MY_UTF16_WC2(b1, b0))
#define WEIGHT_MB4(b0,b1,b2,b3) ((int) MY_UTF16_WC4(b1, b0, b3, b2))
#include "strcoll.inl"
#undef IS_MB2_CHAR
#undef IS_MB4_CHAR
static int
my_utf16le_uni(CHARSET_INFO *cs __attribute__((unused)),
my_wc_t *pwc, const uchar *s, const uchar *e)
{
my_wc_t lo;
if (s + 2 > e)
return MY_CS_TOOSMALL2;
if ((*pwc= uint2korr(s)) < MY_UTF16_SURROGATE_HIGH_FIRST ||
(*pwc > MY_UTF16_SURROGATE_LOW_LAST))
return 2; /* [0000-D7FF,E000-FFFF] */
if (*pwc >= MY_UTF16_SURROGATE_LOW_FIRST)
return MY_CS_ILSEQ; /* [DC00-DFFF] Low surrogate part without high part */
if (s + 4 > e)
return MY_CS_TOOSMALL4;
s+= 2;
if ((lo= uint2korr(s)) < MY_UTF16_SURROGATE_LOW_FIRST ||
lo > MY_UTF16_SURROGATE_LOW_LAST)
return MY_CS_ILSEQ; /* Expected low surrogate part, got something else */
*pwc= 0x10000 + (((*pwc & 0x3FF) << 10) | (lo & 0x3FF));
return 4;
}
static int
my_uni_utf16le(CHARSET_INFO *cs __attribute__((unused)),
my_wc_t wc, uchar *s, uchar *e)
{
uint32 first, second, total;
if (wc < MY_UTF16_SURROGATE_HIGH_FIRST ||
(wc > MY_UTF16_SURROGATE_LOW_LAST &&
wc <= 0xFFFF))
{
if (s + 2 > e)
return MY_CS_TOOSMALL2;
int2store(s, wc);
return 2; /* [0000-D7FF,E000-FFFF] */
}
if (wc < 0xFFFF || wc > 0x10FFFF)
return MY_CS_ILUNI; /* [D800-DFFF,10FFFF+] */
if (s + 4 > e)
return MY_CS_TOOSMALL4;
wc-= 0x10000;
first= (0xD800 | ((wc >> 10) & 0x3FF));
second= (0xDC00 | (wc & 0x3FF));
total= first | (second << 16);
int4store(s, total);
return 4; /* [010000-10FFFF] */
}
static size_t
my_lengthsp_utf16le(CHARSET_INFO *cs __attribute__((unused)),
const char *ptr, size_t length)
{
const char *end= ptr + length;
while (end > ptr + 1 && uint2korr(end - 2) == ' ')
end-= 2;
return (size_t) (end - ptr);
}
static MY_COLLATION_HANDLER my_collation_utf16le_general_ci_handler =
{
NULL, /* init */
my_strnncoll_utf16le_general_ci,
my_strnncollsp_utf16le_general_ci,
my_strnncollsp_nchars_utf16le_general_ci,
my_strnxfrm_utf16le_general_ci,
my_strnxfrmlen_unicode,
my_like_range_generic,
my_wildcmp_utf16_ci,
my_strcasecmp_mb2_or_mb4,
my_instr_mb,
my_hash_sort_utf16,
my_propagate_simple,
my_min_str_mb_simple,
my_max_str_mb_simple,
my_ci_get_id_generic,
my_ci_get_collation_name_generic
};
static MY_COLLATION_HANDLER my_collation_utf16le_bin_handler =
{
NULL, /* init */
my_strnncoll_utf16le_bin,
my_strnncollsp_utf16le_bin,
my_strnncollsp_nchars_utf16le_bin,
my_strnxfrm_unicode_full_bin,
my_strnxfrmlen_unicode_full_bin,
my_like_range_generic,
my_wildcmp_utf16_bin,
my_strcasecmp_mb2_or_mb4,
my_instr_mb,
my_hash_sort_utf16_bin,
my_propagate_simple,
my_min_str_mb_simple,
my_max_str_mb_simple,
my_ci_get_id_generic,
my_ci_get_collation_name_generic
};
static MY_COLLATION_HANDLER my_collation_utf16le_general_nopad_ci_handler =
{
NULL, /* init */
my_strnncoll_utf16le_general_ci,
my_strnncollsp_utf16le_general_nopad_ci,
my_strnncollsp_nchars_utf16le_general_nopad_ci,
my_strnxfrm_nopad_utf16le_general_ci,
my_strnxfrmlen_unicode,
my_like_range_generic,
my_wildcmp_utf16_ci,
my_strcasecmp_mb2_or_mb4,
my_instr_mb,
my_hash_sort_utf16_nopad,
my_propagate_simple,
my_min_str_mb_simple_nopad,
my_max_str_mb_simple,
my_ci_get_id_generic,
my_ci_get_collation_name_generic
};
static MY_COLLATION_HANDLER my_collation_utf16le_nopad_bin_handler =
{
NULL, /* init */
my_strnncoll_utf16le_bin,
my_strnncollsp_utf16le_nopad_bin,
my_strnncollsp_nchars_utf16le_nopad_bin,
my_strnxfrm_unicode_full_nopad_bin,
my_strnxfrmlen_unicode_full_bin,
my_like_range_generic,
my_wildcmp_utf16_bin,
my_strcasecmp_mb2_or_mb4,
my_instr_mb,
my_hash_sort_utf16_nopad_bin,
my_propagate_simple,
my_min_str_mb_simple_nopad,
my_max_str_mb_simple,
my_ci_get_id_generic,
my_ci_get_collation_name_generic
};
static MY_CHARSET_HANDLER my_charset_utf16le_handler=
{
NULL, /* init */
my_numchars_utf16,
my_charpos_utf16,
my_lengthsp_utf16le,
my_numcells_mb,
my_utf16le_uni, /* mb_wc */
my_uni_utf16le, /* wc_mb */
my_mb_ctype_mb,
my_caseup_str_mb2_or_mb4,
my_casedn_str_mb2_or_mb4,
my_caseup_utf16,
my_casedn_utf16,
my_snprintf_mb2,
my_l10tostr_mb2_or_mb4,
my_ll10tostr_mb2_or_mb4,
my_fill_mb2,
my_strntol_mb2_or_mb4,
my_strntoul_mb2_or_mb4,
my_strntoll_mb2_or_mb4,
my_strntoull_mb2_or_mb4,
my_strntod_mb2_or_mb4,
my_strtoll10_mb2,
my_strntoull10rnd_mb2_or_mb4,
my_scan_mb2,
my_charlen_utf16,
my_well_formed_char_length_utf16,
my_copy_fix_mb2_or_mb4,
my_uni_utf16le,
my_wc_to_printable_generic
};
struct charset_info_st my_charset_utf16le_general_ci=
{
56,0,0, /* number */
MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
{ charset_name_utf16le, charset_name_utf16le_length },
{ STRING_WITH_LEN("utf16le_general_ci") },/* name */
"UTF-16LE Unicode", /* comment */
NULL, /* tailoring */
NULL, /* ctype */
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
&my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
1, /* caseup_multiply */
1, /* casedn_multiply */
2, /* mbminlen */
4, /* mbmaxlen */
0, /* min_sort_char */
0xFFFF, /* max_sort_char */
' ', /* pad char */
0, /* escape_with_backslash_is_dangerous */
MY_CS_COLL_LEVELS_S1,
&my_charset_utf16le_handler,
&my_collation_utf16le_general_ci_handler
};
struct charset_info_st my_charset_utf16le_bin=
{
62,0,0, /* number */
MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
{ charset_name_utf16le, charset_name_utf16le_length },
{ STRING_WITH_LEN("utf16le_bin") }, /* name */
"UTF-16LE Unicode", /* comment */
NULL, /* tailoring */
NULL, /* ctype */
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
&my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
1, /* caseup_multiply */
1, /* casedn_multiply */
2, /* mbminlen */
4, /* mbmaxlen */
0, /* min_sort_char */
0xFFFF, /* max_sort_char */
' ', /* pad char */
0, /* escape_with_backslash_is_dangerous */
MY_CS_COLL_LEVELS_S1,
&my_charset_utf16le_handler,
&my_collation_utf16le_bin_handler
};
struct charset_info_st my_charset_utf16le_general_nopad_ci=
{
MY_NOPAD_ID(56),0,0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII|MY_CS_NOPAD,
{ charset_name_utf16le, charset_name_utf16le_length },
{ STRING_WITH_LEN("utf16le_general_nopad_ci") }, /* name */
"UTF-16LE Unicode", /* comment */
NULL, /* tailoring */
NULL, /* ctype */
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
&my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
1, /* caseup_multiply */
1, /* casedn_multiply */
2, /* mbminlen */
4, /* mbmaxlen */
0, /* min_sort_char */
0xFFFF, /* max_sort_char */
' ', /* pad char */
0, /* escape_with_backslash_is_dangerous */
MY_CS_COLL_LEVELS_S1,
&my_charset_utf16le_handler,
&my_collation_utf16le_general_nopad_ci_handler
};
struct charset_info_st my_charset_utf16le_nopad_bin=
{
MY_NOPAD_ID(62),0,0, /* number */
MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII|
MY_CS_NOPAD,
{ charset_name_utf16le, charset_name_utf16le_length },
{ STRING_WITH_LEN("utf16le_nopad_bin") }, /* name */
"UTF-16LE Unicode", /* comment */
NULL, /* tailoring */
NULL, /* ctype */
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
&my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
1, /* caseup_multiply */
1, /* casedn_multiply */
2, /* mbminlen */
4, /* mbmaxlen */
0, /* min_sort_char */
0xFFFF, /* max_sort_char */
' ', /* pad char */
0, /* escape_with_backslash_is_dangerous */
MY_CS_COLL_LEVELS_S1,
&my_charset_utf16le_handler,
&my_collation_utf16le_nopad_bin_handler
};
#endif /* HAVE_CHARSET_utf16 */
#ifdef HAVE_CHARSET_utf32
#include "ctype-utf32.h"
/*
Check is b0 and b1 start a valid UTF32 four-byte sequence.
Don't accept characters greater than U+10FFFF.
*/
#define IS_UTF32_MBHEAD4(b0,b1) (!(b0) && ((uchar) (b1) <= 0x10))
#define IS_MB4_CHAR(b0,b1,b2,b3) (IS_UTF32_MBHEAD4(b0,b1))
static inline int my_weight_utf32_general_ci(uchar b0, uchar b1,
uchar b2, uchar b3)
{
my_wc_t wc= MY_UTF32_WC4(b0, b1, b2, b3);
if (wc <= 0xFFFF)
{
MY_UNICASE_CHARACTER *page= my_unicase_default_pages[wc >> 8];
return (int) (page ? page[wc & 0xFF].sort : wc);
}
return MY_CS_REPLACEMENT_CHARACTER;
}
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf32_general_ci
#define DEFINE_STRNXFRM_UNICODE
#define DEFINE_STRNXFRM_UNICODE_NOPAD
#define MY_MB_WC(cs, pwc, s, e) my_mb_wc_utf32_quick(pwc, s, e)
#define OPTIMIZE_ASCII 0
#define UNICASE_MAXCHAR MY_UNICASE_INFO_DEFAULT_MAXCHAR
#define UNICASE_PAGE0 my_unicase_default_page00
#define UNICASE_PAGES my_unicase_default_pages
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
#define WEIGHT_MB4(b0,b1,b2,b3) my_weight_utf32_general_ci(b0, b1, b2, b3)
#include "strcoll.inl"
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf32_bin
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
#define WEIGHT_MB4(b0,b1,b2,b3) ((int) MY_UTF32_WC4(b0, b1, b2, b3))
#include "strcoll.inl"
#define DEFINE_STRNNCOLLSP_NOPAD
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf32_general_nopad_ci
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
#define WEIGHT_MB4(b0,b1,b2,b3) my_weight_utf32_general_ci(b0, b1, b2, b3)
#include "strcoll.inl"
#define DEFINE_STRNNCOLLSP_NOPAD
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf32_nopad_bin
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
#define WEIGHT_MB4(b0,b1,b2,b3) ((int) MY_UTF32_WC4(b0, b1, b2, b3))
#include "strcoll.inl"
#undef IS_MB2_CHAR
#undef IS_MB4_CHAR
static int
my_utf32_uni(CHARSET_INFO *cs __attribute__((unused)),
my_wc_t *pwc, const uchar *s, const uchar *e)
{
return my_mb_wc_utf32_quick(pwc, s, e);
}
static int
my_uni_utf32(CHARSET_INFO *cs __attribute__((unused)),
my_wc_t wc, uchar *s, uchar *e)
{
if (s + 4 > e)
return MY_CS_TOOSMALL4;
if (wc > 0x10FFFF)
return MY_CS_ILUNI;
s[0]= (uchar) (wc >> 24);
s[1]= (uchar) (wc >> 16) & 0xFF;
s[2]= (uchar) (wc >> 8) & 0xFF;
s[3]= (uchar) wc & 0xFF;
return 4;
}
static inline void
my_tolower_utf32(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
{
MY_UNICASE_CHARACTER *page;
if ((*wc <= uni_plane->maxchar) && (page= uni_plane->page[*wc >> 8]))
*wc= page[*wc & 0xFF].tolower;
}
static inline void
my_toupper_utf32(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
{
MY_UNICASE_CHARACTER *page;
if ((*wc <= uni_plane->maxchar) && (page= uni_plane->page[*wc >> 8]))
*wc= page[*wc & 0xFF].toupper;
}
static inline void
my_tosort_utf32(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
{
if (*wc <= uni_plane->maxchar)
{
MY_UNICASE_CHARACTER *page;
if ((page= uni_plane->page[*wc >> 8]))
*wc= page[*wc & 0xFF].sort;
}
else
{
*wc= MY_CS_REPLACEMENT_CHARACTER;
}
}
static size_t
my_lengthsp_utf32(CHARSET_INFO *cs __attribute__((unused)),
const char *ptr, size_t length)
{
const char *end= ptr + length;
DBUG_ASSERT((length % 4) == 0);
while (end > ptr + 3 && end[-1] == ' ' && !end[-2] && !end[-3] && !end[-4])
end-= 4;
return (size_t) (end - ptr);
}
static size_t
my_caseup_utf32(CHARSET_INFO *cs, const char *src, size_t srclen,
char *dst, size_t dstlen)
{
my_wc_t wc;
int res;
const char *srcend= src + srclen;
char *dstend= dst + dstlen;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
DBUG_ASSERT(srclen <= dstlen);
while ((src < srcend) &&
(res= my_utf32_uni(cs, &wc, (uchar *)src, (uchar*) srcend)) > 0)
{
my_toupper_utf32(uni_plane, &wc);
if (res != my_uni_utf32(cs, wc, (uchar*) dst, (uchar*) dstend))
break;
src+= res;
dst+= res;
}
return srclen;
}
static void
my_hash_sort_utf32_nopad(CHARSET_INFO *cs, const uchar *s, size_t slen,
ulong *nr1, ulong *nr2)
{
my_wc_t wc;
int res;
const uchar *e= s + slen;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
register ulong m1= *nr1, m2= *nr2;
while ((res= my_utf32_uni(cs, &wc, (uchar*) s, (uchar*) e)) > 0)
{
my_tosort_utf32(uni_plane, &wc);
MY_HASH_ADD(m1, m2, (uint) (wc >> 24));
MY_HASH_ADD(m1, m2, (uint) (wc >> 16) & 0xFF);
MY_HASH_ADD(m1, m2, (uint) (wc >> 8) & 0xFF);
MY_HASH_ADD(m1, m2, (uint) (wc & 0xFF));
s+= res;
}
*nr1= m1;
*nr2= m2;
}
static void
my_hash_sort_utf32(CHARSET_INFO *cs, const uchar *s, size_t slen,
ulong *nr1, ulong *nr2)
{
size_t lengthsp= my_lengthsp_utf32(cs, (const char *) s, slen);
my_hash_sort_utf32_nopad(cs, s, lengthsp, nr1, nr2);
}
static size_t
my_casedn_utf32(CHARSET_INFO *cs, const char *src, size_t srclen,
char *dst, size_t dstlen)
{
my_wc_t wc;
int res;
const char *srcend= src + srclen;
char *dstend= dst + dstlen;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
DBUG_ASSERT(srclen <= dstlen);
while ((res= my_utf32_uni(cs, &wc, (uchar*) src, (uchar*) srcend)) > 0)
{
my_tolower_utf32(uni_plane,&wc);
if (res != my_uni_utf32(cs, wc, (uchar*) dst, (uchar*) dstend))
break;
src+= res;
dst+= res;
}
return srclen;
}
static int
my_charlen_utf32(CHARSET_INFO *cs __attribute__((unused)),
const uchar *b, const uchar *e)
{
return b + 4 > e ? MY_CS_TOOSMALL4 :
IS_UTF32_MBHEAD4(b[0], b[1]) ? 4 : MY_CS_ILSEQ;
}
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf32
#define CHARLEN(cs,str,end) my_charlen_utf32(cs,str,end)
#define DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
#include "ctype-mb.inl"
#undef MY_FUNCTION_NAME
#undef CHARLEN
#undef DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
/* Defines my_well_formed_char_length_utf32 */
static size_t
my_vsnprintf_utf32(char *dst, size_t n, const char* fmt, va_list ap)
{
char *start= dst, *end= dst + n;
DBUG_ASSERT((n % 4) == 0);
for (; *fmt ; fmt++)
{
if (fmt[0] != '%')
{
if (dst >= end) /* End of buffer */
break;
*dst++= '\0';
*dst++= '\0';
*dst++= '\0';
*dst++= *fmt; /* Copy ordinary char */
continue;
}
fmt++;
/* Skip if max size is used (to be compatible with printf) */
while ( (*fmt>='0' && *fmt<='9') || *fmt == '.' || *fmt == '-')
fmt++;
if (*fmt == 'l')
fmt++;
if (*fmt == 's') /* String parameter */
{
reg2 char *par= va_arg(ap, char *);
size_t plen;
size_t left_len= (size_t)(end - dst);
if (!par) par= (char*)"(null)";
plen= strlen(par);
if (left_len <= plen*4)
plen= left_len / 4 - 1;
for ( ; plen ; plen--, dst+= 4, par++)
{
dst[0]= '\0';
dst[1]= '\0';
dst[2]= '\0';
dst[3]= par[0];
}
continue;
}
else if (*fmt == 'd' || *fmt == 'u') /* Integer parameter */
{
register int iarg;
char nbuf[16];
char *pbuf= nbuf;
if ((size_t) (end - dst) < 64)
break;
iarg= va_arg(ap, int);
if (*fmt == 'd')
int10_to_str((long) iarg, nbuf, -10);
else
int10_to_str((long) (uint) iarg,nbuf,10);
for (; pbuf[0]; pbuf++)
{
*dst++= '\0';
*dst++= '\0';
*dst++= '\0';
*dst++= *pbuf;
}
continue;
}
/* We come here on '%%', unknown code or too long parameter */
if (dst == end)
break;
*dst++= '\0';
*dst++= '\0';
*dst++= '\0';
*dst++= '%'; /* % used as % or unknown code */
}
DBUG_ASSERT(dst < end);
*dst++= '\0';
*dst++= '\0';
*dst++= '\0';
*dst++= '\0'; /* End of errmessage */
return (size_t) (dst - start - 4);
}
static size_t
my_snprintf_utf32(CHARSET_INFO *cs __attribute__((unused)),
char* to, size_t n, const char* fmt, ...)
{
size_t ret;
va_list args;
va_start(args,fmt);
ret= my_vsnprintf_utf32(to, n, fmt, args);
va_end(args);
return ret;
}
static longlong
my_strtoll10_utf32(CHARSET_INFO *cs __attribute__((unused)),
const char *nptr, char **endptr, int *error)
{
const char *s, *end, *start, *n_end, *true_end;
uchar c;
unsigned long i, j, k;
ulonglong li;
int negative;
ulong cutoff, cutoff2, cutoff3;
s= nptr;
/* If fixed length string */
if (endptr)
{
/* Make sure string length is even */
end= s + ((*endptr - s) / 4) * 4;
while (s < end && !s[0] && !s[1] && !s[2] &&
(s[3] == ' ' || s[3] == '\t'))
s+= 4;
if (s == end)
goto no_conv;
}
else
{
/* We don't support null terminated strings in UCS2 */
goto no_conv;
}
/* Check for a sign. */
negative= 0;
if (!s[0] && !s[1] && !s[2] && s[3] == '-')
{
*error= -1; /* Mark as negative number */
negative= 1;
s+= 4;
if (s == end)
goto no_conv;
cutoff= MAX_NEGATIVE_NUMBER / LFACTOR2;
cutoff2= (MAX_NEGATIVE_NUMBER % LFACTOR2) / 100;
cutoff3= MAX_NEGATIVE_NUMBER % 100;
}
else
{
*error= 0;
if (!s[0] && !s[1] && !s[2] && s[3] == '+')
{
s+= 4;
if (s == end)
goto no_conv;
}
cutoff= ULONGLONG_MAX / LFACTOR2;
cutoff2= ULONGLONG_MAX % LFACTOR2 / 100;
cutoff3= ULONGLONG_MAX % 100;
}
/* Handle case where we have a lot of pre-zero */
if (!s[0] && !s[1] && !s[2] && s[3] == '0')
{
i= 0;
do
{
s+= 4;
if (s == end)
goto end_i; /* Return 0 */
}
while (!s[0] && !s[1] && !s[2] && s[3] == '0');
n_end= s + 4 * INIT_CNT;
}
else
{
/* Read first digit to check that it's a valid number */
if (s[0] || s[1] || s[2] || (c= (s[3]-'0')) > 9)
goto no_conv;
i= c;
s+= 4;
n_end= s + 4 * (INIT_CNT-1);
}
/* Handle first 9 digits and store them in i */
if (n_end > end)
n_end= end;
for (; s != n_end ; s+= 4)
{
if (s[0] || s[1] || s[2] || (c= (s[3] - '0')) > 9)
goto end_i;
i= i * 10 + c;
}
if (s == end)
goto end_i;
/* Handle next 9 digits and store them in j */
j= 0;
start= s; /* Used to know how much to shift i */
n_end= true_end= s + 4 * INIT_CNT;
if (n_end > end)
n_end= end;
do
{
if (s[0] || s[1] || s[2] || (c= (s[3] - '0')) > 9)
goto end_i_and_j;
j= j * 10 + c;
s+= 4;
} while (s != n_end);
if (s == end)
{
if (s != true_end)
goto end_i_and_j;
goto end3;
}
if (s[0] || s[1] || s[2] || (c= (s[3] - '0')) > 9)
goto end3;
/* Handle the next 1 or 2 digits and store them in k */
k=c;
s+= 4;
if (s == end || s[0] || s[1] || s[2] || (c= (s[3]-'0')) > 9)
goto end4;
k= k * 10 + c;
s+= 4;
*endptr= (char*) s;
/* number string should have ended here */
if (s != end && !s[0] && !s[1] && !s[2] && (c= (s[3] - '0')) <= 9)
goto overflow;
/* Check that we didn't get an overflow with the last digit */
if (i > cutoff || (i == cutoff && ((j > cutoff2 || j == cutoff2) &&
k > cutoff3)))
goto overflow;
li= i * LFACTOR2+ (ulonglong) j * 100 + k;
return (longlong) li;
overflow: /* *endptr is set here */
*error= MY_ERRNO_ERANGE;
return negative ? LONGLONG_MIN : (longlong) ULONGLONG_MAX;
end_i:
*endptr= (char*) s;
return (negative ? ((longlong) -(long) i) : (longlong) i);
end_i_and_j:
li= (ulonglong) i * lfactor[(size_t) (s-start) / 4] + j;
*endptr= (char*) s;
return (negative ? -((longlong) li) : (longlong) li);
end3:
li= (ulonglong) i*LFACTOR+ (ulonglong) j;
*endptr= (char*) s;
return (negative ? -((longlong) li) : (longlong) li);
end4:
li= (ulonglong) i*LFACTOR1+ (ulonglong) j * 10 + k;
*endptr= (char*) s;
if (negative)
{
if (li > MAX_NEGATIVE_NUMBER)
goto overflow;
return -((longlong) li);
}
return (longlong) li;
no_conv:
/* There was no number to convert. */
*error= MY_ERRNO_EDOM;
*endptr= (char *) nptr;
return 0;
}
static size_t
my_numchars_utf32(CHARSET_INFO *cs __attribute__((unused)),
const char *b, const char *e)
{
return (size_t) (e - b) / 4;
}
static size_t
my_charpos_utf32(CHARSET_INFO *cs __attribute__((unused)),
const char *b, const char *e, size_t pos)
{
size_t string_length= (size_t) (e - b);
return pos * 4 > string_length ? string_length + 4 : pos * 4;
}
static
void my_fill_utf32(CHARSET_INFO *cs,
char *s, size_t slen, int fill)
{
char buf[10];
#ifdef DBUG_ASSERT_EXISTS
uint buflen;
#endif
char *e= s + slen;
DBUG_ASSERT((slen % 4) == 0);
#ifdef DBUG_ASSERT_EXISTS
buflen=
#endif
my_ci_wc_mb(cs, (my_wc_t) fill, (uchar*) buf, (uchar*) buf + sizeof(buf));
DBUG_ASSERT(buflen == 4);
while (s < e)
{
memcpy(s, buf, 4);
s+= 4;
}
}
static int
my_wildcmp_utf32_ci(CHARSET_INFO *cs,
const char *str, const char *str_end,
const char *wildstr, const char *wildend,
int escape, int w_one, int w_many)
{
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend,
escape, w_one, w_many, uni_plane);
}
static int
my_wildcmp_utf32_bin(CHARSET_INFO *cs,
const char *str,const char *str_end,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many)
{
return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend,
escape, w_one, w_many, NULL);
}
static size_t
my_scan_utf32(CHARSET_INFO *cs,
const char *str, const char *end, int sequence_type)
{
const char *str0= str;
switch (sequence_type)
{
case MY_SEQ_SPACES:
for ( ; str < end; )
{
my_wc_t wc;
int res= my_utf32_uni(cs, &wc, (uchar*) str, (uchar*) end);
if (res < 0 || wc != ' ')
break;
str+= res;
}
return (size_t) (str - str0);
case MY_SEQ_NONSPACES:
DBUG_ASSERT(0); /* Not implemented */
/* pass through */
default:
return 0;
}
}
static MY_COLLATION_HANDLER my_collation_utf32_general_ci_handler =
{
NULL, /* init */
my_strnncoll_utf32_general_ci,
my_strnncollsp_utf32_general_ci,
my_strnncollsp_nchars_utf32_general_ci,
my_strnxfrm_utf32_general_ci,
my_strnxfrmlen_unicode,
my_like_range_generic,
my_wildcmp_utf32_ci,
my_strcasecmp_mb2_or_mb4,
my_instr_mb,
my_hash_sort_utf32,
my_propagate_simple,
my_min_str_mb_simple,
my_max_str_mb_simple,
my_ci_get_id_generic,
my_ci_get_collation_name_generic
};
static MY_COLLATION_HANDLER my_collation_utf32_bin_handler =
{
NULL, /* init */
my_strnncoll_utf32_bin,
my_strnncollsp_utf32_bin,
my_strnncollsp_nchars_utf32_bin,
my_strnxfrm_unicode_full_bin,
my_strnxfrmlen_unicode_full_bin,
my_like_range_generic,
my_wildcmp_utf32_bin,
my_strcasecmp_mb2_or_mb4,
my_instr_mb,
my_hash_sort_utf32,
my_propagate_simple,
my_min_str_mb_simple,
my_max_str_mb_simple,
my_ci_get_id_generic,
my_ci_get_collation_name_generic
};
static MY_COLLATION_HANDLER my_collation_utf32_general_nopad_ci_handler =
{
NULL, /* init */
my_strnncoll_utf32_general_ci,
my_strnncollsp_utf32_general_nopad_ci,
my_strnncollsp_nchars_utf32_general_nopad_ci,
my_strnxfrm_nopad_utf32_general_ci,
my_strnxfrmlen_unicode,
my_like_range_generic,
my_wildcmp_utf32_ci,
my_strcasecmp_mb2_or_mb4,
my_instr_mb,
my_hash_sort_utf32_nopad,
my_propagate_simple,
my_min_str_mb_simple_nopad,
my_max_str_mb_simple,
my_ci_get_id_generic,
my_ci_get_collation_name_generic
};
static MY_COLLATION_HANDLER my_collation_utf32_nopad_bin_handler =
{
NULL, /* init */
my_strnncoll_utf32_bin,
my_strnncollsp_utf32_nopad_bin,
my_strnncollsp_nchars_utf32_nopad_bin,
my_strnxfrm_unicode_full_nopad_bin,
my_strnxfrmlen_unicode_full_bin,
my_like_range_generic,
my_wildcmp_utf32_bin,
my_strcasecmp_mb2_or_mb4,
my_instr_mb,
my_hash_sort_utf32_nopad,
my_propagate_simple,
my_min_str_mb_simple_nopad,
my_max_str_mb_simple,
my_ci_get_id_generic,
my_ci_get_collation_name_generic
};
MY_CHARSET_HANDLER my_charset_utf32_handler=
{
NULL, /* init */
my_numchars_utf32,
my_charpos_utf32,
my_lengthsp_utf32,
my_numcells_mb,
my_utf32_uni,
my_uni_utf32,
my_mb_ctype_mb,
my_caseup_str_mb2_or_mb4,
my_casedn_str_mb2_or_mb4,
my_caseup_utf32,
my_casedn_utf32,
my_snprintf_utf32,
my_l10tostr_mb2_or_mb4,
my_ll10tostr_mb2_or_mb4,
my_fill_utf32,
my_strntol_mb2_or_mb4,
my_strntoul_mb2_or_mb4,
my_strntoll_mb2_or_mb4,
my_strntoull_mb2_or_mb4,
my_strntod_mb2_or_mb4,
my_strtoll10_utf32,
my_strntoull10rnd_mb2_or_mb4,
my_scan_utf32,
my_charlen_utf32,
my_well_formed_char_length_utf32,
my_copy_fix_mb2_or_mb4,
my_uni_utf32,
my_wc_to_printable_generic
};
struct charset_info_st my_charset_utf32_general_ci=
{
60,0,0, /* number */
MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
{ charset_name_utf32, charset_name_utf32_length }, /* cs name */
{ STRING_WITH_LEN("utf32_general_ci") }, /* name */
"UTF-32 Unicode", /* comment */
NULL, /* tailoring */
NULL, /* ctype */
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
&my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
1, /* caseup_multiply */
1, /* casedn_multiply */
4, /* mbminlen */
4, /* mbmaxlen */
0, /* min_sort_char */
0xFFFF, /* max_sort_char */
' ', /* pad char */
0, /* escape_with_backslash_is_dangerous */
MY_CS_COLL_LEVELS_S1,
&my_charset_utf32_handler,
&my_collation_utf32_general_ci_handler
};
struct charset_info_st my_charset_utf32_bin=
{
61,0,0, /* number */
MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
{ charset_name_utf32, charset_name_utf32_length }, /* cs name */
{ STRING_WITH_LEN("utf32_bin") }, /* name */
"UTF-32 Unicode", /* comment */
NULL, /* tailoring */
NULL, /* ctype */
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
&my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
1, /* caseup_multiply */
1, /* casedn_multiply */
4, /* mbminlen */
4, /* mbmaxlen */
0, /* min_sort_char */
0xFFFF, /* max_sort_char */
' ', /* pad char */
0, /* escape_with_backslash_is_dangerous */
MY_CS_COLL_LEVELS_S1,
&my_charset_utf32_handler,
&my_collation_utf32_bin_handler
};
struct charset_info_st my_charset_utf32_general_nopad_ci=
{
MY_NOPAD_ID(60),0,0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII|MY_CS_NOPAD,
{ charset_name_utf32, charset_name_utf32_length }, /* cs name */
{ STRING_WITH_LEN("utf32_general_nopad_ci") }, /* name */
"UTF-32 Unicode", /* comment */
NULL, /* tailoring */
NULL, /* ctype */
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
&my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
1, /* caseup_multiply */
1, /* casedn_multiply */
4, /* mbminlen */
4, /* mbmaxlen */
0, /* min_sort_char */
0xFFFF, /* max_sort_char */
' ', /* pad char */
0, /* escape_with_backslash_is_dangerous */
MY_CS_COLL_LEVELS_S1,
&my_charset_utf32_handler,
&my_collation_utf32_general_nopad_ci_handler
};
struct charset_info_st my_charset_utf32_nopad_bin=
{
MY_NOPAD_ID(61),0,0, /* number */
MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII|
MY_CS_NOPAD,
{ charset_name_utf32, charset_name_utf32_length }, /* cs name */
{ STRING_WITH_LEN("utf32_nopad_bin") }, /* name */
"UTF-32 Unicode", /* comment */
NULL, /* tailoring */
NULL, /* ctype */
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
&my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
1, /* caseup_multiply */
1, /* casedn_multiply */
4, /* mbminlen */
4, /* mbmaxlen */
0, /* min_sort_char */
0xFFFF, /* max_sort_char */
' ', /* pad char */
0, /* escape_with_backslash_is_dangerous */
MY_CS_COLL_LEVELS_S1,
&my_charset_utf32_handler,
&my_collation_utf32_nopad_bin_handler
};
#endif /* HAVE_CHARSET_utf32 */
#ifdef HAVE_CHARSET_ucs2
#include "ctype-ucs2.h"
static const uchar ctype_ucs2[] = {
0,
32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32,
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
132,132,132,132,132,132,132,132,132,132, 16, 16, 16, 16, 16, 16,
16,129,129,129,129,129,129, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16, 16,
16,130,130,130,130,130,130, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, 16, 16, 16, 32,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
static const uchar to_lower_ucs2[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
64, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
112,113,114,115,116,117,118,119,120,121,122, 91, 92, 93, 94, 95,
96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
};
static const uchar to_upper_ucs2[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127,
128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
};
/* Definitions for strcoll.inl */
#define IS_MB2_CHAR(x,y) (1)
#define UCS2_CODE(b0,b1) (((uchar) b0) << 8 | ((uchar) b1))
static inline int my_weight_mb2_ucs2_general_ci(uchar b0, uchar b1)
{
my_wc_t wc= UCS2_CODE(b0, b1);
MY_UNICASE_CHARACTER *page= my_unicase_default_pages[wc >> 8];
return (int) (page ? page[wc & 0xFF].sort : wc);
}
#define MY_FUNCTION_NAME(x) my_ ## x ## _ucs2_general_ci
#define DEFINE_STRNXFRM_UNICODE
#define DEFINE_STRNXFRM_UNICODE_NOPAD
#define MY_MB_WC(cs, pwc, s, e) my_mb_wc_ucs2_quick(pwc, s, e)
#define OPTIMIZE_ASCII 0
#define UNICASE_MAXCHAR MY_UNICASE_INFO_DEFAULT_MAXCHAR
#define UNICASE_PAGE0 my_unicase_default_page00
#define UNICASE_PAGES my_unicase_default_pages
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
#define WEIGHT_MB2(b0,b1) my_weight_mb2_ucs2_general_ci(b0,b1)
#include "strcoll.inl"
#define MY_FUNCTION_NAME(x) my_ ## x ## _ucs2_bin
#define DEFINE_STRNXFRM_UNICODE_BIN2
#define MY_MB_WC(cs, pwc, s, e) my_mb_wc_ucs2_quick(pwc, s, e)
#define OPTIMIZE_ASCII 0
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
#define WEIGHT_MB2(b0,b1) UCS2_CODE(b0,b1)
#include "strcoll.inl"
#define DEFINE_STRNNCOLLSP_NOPAD
#define MY_FUNCTION_NAME(x) my_ ## x ## _ucs2_general_nopad_ci
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
#define WEIGHT_MB2(b0,b1) my_weight_mb2_ucs2_general_ci(b0,b1)
#include "strcoll.inl"
#define DEFINE_STRNNCOLLSP_NOPAD
#define MY_FUNCTION_NAME(x) my_ ## x ## _ucs2_nopad_bin
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
#define WEIGHT_MB2(b0,b1) UCS2_CODE(b0,b1)
#include "strcoll.inl"
static int
my_charlen_ucs2(CHARSET_INFO *cs __attribute__((unused)),
const uchar *s, const uchar *e)
{
return s + 2 > e ? MY_CS_TOOSMALLN(2) : 2;
}
static int my_ucs2_uni(CHARSET_INFO *cs __attribute__((unused)),
my_wc_t * pwc, const uchar *s, const uchar *e)
{
return my_mb_wc_ucs2_quick(pwc, s, e);
}
static int my_uni_ucs2(CHARSET_INFO *cs __attribute__((unused)) ,
my_wc_t wc, uchar *r, uchar *e)
{
if ( r+2 > e )
return MY_CS_TOOSMALL2;
if (wc > 0xFFFF) /* UCS2 does not support characters outside BMP */
return MY_CS_ILUNI;
r[0]= (uchar) (wc >> 8);
r[1]= (uchar) (wc & 0xFF);
return 2;
}
static inline void
my_tolower_ucs2(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
{
MY_UNICASE_CHARACTER *page;
if ((page= uni_plane->page[(*wc >> 8) & 0xFF]))
*wc= page[*wc & 0xFF].tolower;
}
static inline void
my_toupper_ucs2(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
{
MY_UNICASE_CHARACTER *page;
if ((page= uni_plane->page[(*wc >> 8) & 0xFF]))
*wc= page[*wc & 0xFF].toupper;
}
static inline void
my_tosort_ucs2(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
{
MY_UNICASE_CHARACTER *page;
if ((page= uni_plane->page[(*wc >> 8) & 0xFF]))
*wc= page[*wc & 0xFF].sort;
}
static size_t my_caseup_ucs2(CHARSET_INFO *cs, const char *src, size_t srclen,
char *dst, size_t dstlen)
{
my_wc_t wc;
int res;
const char *srcend= src + srclen;
char *dstend= dst + dstlen;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
DBUG_ASSERT(srclen <= dstlen);
while ((src < srcend) &&
(res= my_ucs2_uni(cs, &wc, (uchar *)src, (uchar*) srcend)) > 0)
{
my_toupper_ucs2(uni_plane, &wc);
if (res != my_uni_ucs2(cs, wc, (uchar*) dst, (uchar*) dstend))
break;
src+= res;
dst+= res;
}
return srclen;
}
static void
my_hash_sort_ucs2_nopad(CHARSET_INFO *cs, const uchar *s, size_t slen,
ulong *nr1, ulong *nr2)
{
my_wc_t wc;
int res;
const uchar *e=s+slen;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
register ulong m1= *nr1, m2= *nr2;
while ((s < e) && (res=my_ucs2_uni(cs,&wc, (uchar *)s, (uchar*)e)) >0)
{
my_tosort_ucs2(uni_plane, &wc);
MY_HASH_ADD_16(m1, m2, wc);
s+=res;
}
*nr1= m1;
*nr2= m2;
}
static void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, size_t slen,
ulong *nr1, ulong *nr2)
{
size_t lengthsp= my_lengthsp_mb2(cs, (const char *) s, slen);
my_hash_sort_ucs2_nopad(cs, s, lengthsp, nr1, nr2);
}
static size_t my_casedn_ucs2(CHARSET_INFO *cs, const char *src, size_t srclen,
char *dst, size_t dstlen)
{
my_wc_t wc;
int res;
const char *srcend= src + srclen;
char *dstend= dst + dstlen;
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
DBUG_ASSERT(srclen <= dstlen);
while ((src < srcend) &&
(res= my_ucs2_uni(cs, &wc, (uchar*) src, (uchar*) srcend)) > 0)
{
my_tolower_ucs2(uni_plane, &wc);
if (res != my_uni_ucs2(cs, wc, (uchar*) dst, (uchar*) dstend))
break;
src+= res;
dst+= res;
}
return srclen;
}
static void
my_fill_ucs2(CHARSET_INFO *cs __attribute__((unused)),
char *s, size_t l, int fill)
{
DBUG_ASSERT(fill <= 0xFFFF);
#ifdef WAITING_FOR_GCC_VECTORIZATION_BUG_TO_BE_FIXED
/*
This code with int2store() is known to be faster on some processors,
but crashes on other processors due to a possible bug in GCC's
-ftree-vectorization (which is enabled in -O3) in case of
a non-aligned memory. See here for details:
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58039
*/
char *last= s + l - 2;
uint16 tmp= (fill >> 8) + ((fill & 0xFF) << 8); /* swap bytes */
DBUG_ASSERT(fill <= 0xFFFF);
for ( ; s <= last; s+= 2)
int2store(s, tmp); /* store little-endian */
#else
for ( ; l >= 2; s[0]= (fill >> 8), s[1]= (fill & 0xFF), s+= 2, l-= 2);
#endif
}
static
size_t my_numchars_ucs2(CHARSET_INFO *cs __attribute__((unused)),
const char *b, const char *e)
{
return (size_t) (e-b)/2;
}
static
size_t my_charpos_ucs2(CHARSET_INFO *cs __attribute__((unused)),
const char *b __attribute__((unused)),
const char *e __attribute__((unused)),
size_t pos)
{
size_t string_length= (size_t) (e - b);
return pos > string_length ? string_length + 2 : pos * 2;
}
static size_t
my_well_formed_char_length_ucs2(CHARSET_INFO *cs __attribute__((unused)),
const char *b, const char *e,
size_t nchars, MY_STRCOPY_STATUS *status)
{
size_t length= e - b;
if (nchars * 2 <= length)
{
status->m_well_formed_error_pos= NULL;
status->m_source_end_pos= b + (nchars * 2);
return nchars;
}
if (length % 2)
{
status->m_well_formed_error_pos= status->m_source_end_pos= e - 1;
}
else
{
status->m_well_formed_error_pos= NULL;
status->m_source_end_pos= e;
}
return length / 2;
}
static
int my_wildcmp_ucs2_ci(CHARSET_INFO *cs,
const char *str,const char *str_end,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many)
{
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend,
escape,w_one,w_many,uni_plane);
}
static
int my_wildcmp_ucs2_bin(CHARSET_INFO *cs,
const char *str,const char *str_end,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many)
{
return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend,
escape,w_one,w_many,NULL);
}
static void
my_hash_sort_ucs2_nopad_bin(CHARSET_INFO *cs __attribute__((unused)),
const uchar *key, size_t len,
ulong *nr1, ulong *nr2)
{
const uchar *end= key + len;
register ulong m1= *nr1, m2= *nr2;
for ( ; key < end ; key++)
{
MY_HASH_ADD(m1, m2, (uint)*key);
}
*nr1= m1;
*nr2= m2;
}
static void
my_hash_sort_ucs2_bin(CHARSET_INFO *cs,
const uchar *key, size_t len, ulong *nr1, ulong *nr2)
{
size_t lengthsp= my_lengthsp_mb2(cs, (const char *) key, len);
my_hash_sort_ucs2_nopad_bin(cs, key, lengthsp, nr1, nr2);
}
static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler =
{
NULL, /* init */
my_strnncoll_ucs2_general_ci,
my_strnncollsp_ucs2_general_ci,
my_strnncollsp_nchars_ucs2_general_ci,
my_strnxfrm_ucs2_general_ci,
my_strnxfrmlen_unicode,
my_like_range_generic,
my_wildcmp_ucs2_ci,
my_strcasecmp_mb2_or_mb4,
my_instr_mb,
my_hash_sort_ucs2,
my_propagate_simple,
my_min_str_mb_simple,
my_max_str_mb_simple,
my_ci_get_id_generic,
my_ci_get_collation_name_generic
};
static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler =
{
NULL, /* init */
my_strnncoll_ucs2_bin,
my_strnncollsp_ucs2_bin,
my_strnncollsp_nchars_ucs2_bin,
my_strnxfrm_ucs2_bin,
my_strnxfrmlen_unicode,
my_like_range_generic,
my_wildcmp_ucs2_bin,
my_strcasecmp_mb2_or_mb4,
my_instr_mb,
my_hash_sort_ucs2_bin,
my_propagate_simple,
my_min_str_mb_simple,
my_max_str_mb_simple,
my_ci_get_id_generic,
my_ci_get_collation_name_generic
};
static MY_COLLATION_HANDLER my_collation_ucs2_general_nopad_ci_handler =
{
NULL, /* init */
my_strnncoll_ucs2_general_ci,
my_strnncollsp_ucs2_general_nopad_ci,
my_strnncollsp_nchars_ucs2_general_nopad_ci,
my_strnxfrm_nopad_ucs2_general_ci,
my_strnxfrmlen_unicode,
my_like_range_generic,
my_wildcmp_ucs2_ci,
my_strcasecmp_mb2_or_mb4,
my_instr_mb,
my_hash_sort_ucs2_nopad,
my_propagate_simple,
my_min_str_mb_simple_nopad,
my_max_str_mb_simple,
my_ci_get_id_generic,
my_ci_get_collation_name_generic
};
static MY_COLLATION_HANDLER my_collation_ucs2_nopad_bin_handler =
{
NULL, /* init */
my_strnncoll_ucs2_bin,
my_strnncollsp_ucs2_nopad_bin,
my_strnncollsp_nchars_ucs2_nopad_bin,
my_strnxfrm_nopad_ucs2_bin,
my_strnxfrmlen_unicode,
my_like_range_generic,
my_wildcmp_ucs2_bin,
my_strcasecmp_mb2_or_mb4,
my_instr_mb,
my_hash_sort_ucs2_nopad_bin,
my_propagate_simple,
my_min_str_mb_simple_nopad,
my_max_str_mb_simple,
my_ci_get_id_generic,
my_ci_get_collation_name_generic
};
MY_CHARSET_HANDLER my_charset_ucs2_handler=
{
NULL, /* init */
my_numchars_ucs2,
my_charpos_ucs2,
my_lengthsp_mb2,
my_numcells_mb,
my_ucs2_uni, /* mb_wc */
my_uni_ucs2, /* wc_mb */
my_mb_ctype_mb,
my_caseup_str_mb2_or_mb4,
my_casedn_str_mb2_or_mb4,
my_caseup_ucs2,
my_casedn_ucs2,
my_snprintf_mb2,
my_l10tostr_mb2_or_mb4,
my_ll10tostr_mb2_or_mb4,
my_fill_ucs2,
my_strntol_mb2_or_mb4,
my_strntoul_mb2_or_mb4,
my_strntoll_mb2_or_mb4,
my_strntoull_mb2_or_mb4,
my_strntod_mb2_or_mb4,
my_strtoll10_mb2,
my_strntoull10rnd_mb2_or_mb4,
my_scan_mb2,
my_charlen_ucs2,
my_well_formed_char_length_ucs2,
my_copy_fix_mb2_or_mb4,
my_uni_ucs2,
my_wc_to_printable_generic
};
struct charset_info_st my_charset_ucs2_general_ci=
{
35,0,0, /* number */
MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
{ charset_name_ucs2, charset_name_ucs2_length}, /* cs name */
{ STRING_WITH_LEN("ucs2_general_ci") }, /* name */
"", /* comment */
NULL, /* tailoring */
ctype_ucs2, /* ctype */
to_lower_ucs2, /* to_lower */
to_upper_ucs2, /* to_upper */
to_upper_ucs2, /* sort_order */
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
&my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
1, /* caseup_multiply */
1, /* casedn_multiply */
2, /* mbminlen */
2, /* mbmaxlen */
0, /* min_sort_char */
0xFFFF, /* max_sort_char */
' ', /* pad char */
0, /* escape_with_backslash_is_dangerous */
MY_CS_COLL_LEVELS_S1,
&my_charset_ucs2_handler,
&my_collation_ucs2_general_ci_handler
};
struct charset_info_st my_charset_ucs2_general_mysql500_ci=
{
159, 0, 0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, /* state */
{ charset_name_ucs2, charset_name_ucs2_length }, /* cs name */
{ STRING_WITH_LEN("ucs2_general_mysql500_ci") }, /* name */
"", /* comment */
NULL, /* tailoring */
ctype_ucs2, /* ctype */
to_lower_ucs2, /* to_lower */
to_upper_ucs2, /* to_upper */
to_upper_ucs2, /* sort_order */
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
&my_unicase_mysql500, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
1, /* caseup_multiply */
1, /* casedn_multiply */
2, /* mbminlen */
2, /* mbmaxlen */
0, /* min_sort_char */
0xFFFF, /* max_sort_char */
' ', /* pad char */
0, /* escape_with_backslash_is_dangerous */
MY_CS_COLL_LEVELS_S1,
&my_charset_ucs2_handler,
&my_collation_ucs2_general_ci_handler
};
struct charset_info_st my_charset_ucs2_bin=
{
90,0,0, /* number */
MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE|MY_CS_NONASCII,
{ charset_name_ucs2, charset_name_ucs2_length }, /* cs name */
{ STRING_WITH_LEN("ucs2_bin") }, /* name */
"", /* comment */
NULL, /* tailoring */
ctype_ucs2, /* ctype */
to_lower_ucs2, /* to_lower */
to_upper_ucs2, /* to_upper */
NULL, /* sort_order */
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
&my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
1, /* caseup_multiply */
1, /* casedn_multiply */
2, /* mbminlen */
2, /* mbmaxlen */
0, /* min_sort_char */
0xFFFF, /* max_sort_char */
' ', /* pad char */
0, /* escape_with_backslash_is_dangerous */
MY_CS_COLL_LEVELS_S1,
&my_charset_ucs2_handler,
&my_collation_ucs2_bin_handler
};
struct charset_info_st my_charset_ucs2_general_nopad_ci=
{
MY_NOPAD_ID(35),0,0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII|MY_CS_NOPAD,
{ charset_name_ucs2, charset_name_ucs2_length }, /* cs name */
{ STRING_WITH_LEN("ucs2_general_nopad_ci") }, /* name */
"", /* comment */
NULL, /* tailoring */
ctype_ucs2, /* ctype */
to_lower_ucs2, /* to_lower */
to_upper_ucs2, /* to_upper */
to_upper_ucs2, /* sort_order */
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
&my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
1, /* caseup_multiply */
1, /* casedn_multiply */
2, /* mbminlen */
2, /* mbmaxlen */
0, /* min_sort_char */
0xFFFF, /* max_sort_char */
' ', /* pad char */
0, /* escape_with_backslash_is_dangerous */
MY_CS_COLL_LEVELS_S1,
&my_charset_ucs2_handler,
&my_collation_ucs2_general_nopad_ci_handler
};
struct charset_info_st my_charset_ucs2_nopad_bin=
{
MY_NOPAD_ID(90),0,0, /* number */
MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE|MY_CS_NONASCII|MY_CS_NOPAD,
{ charset_name_ucs2, charset_name_ucs2_length }, /* cs name */
{ STRING_WITH_LEN("ucs2_nopad_bin") }, /* name */
"", /* comment */
NULL, /* tailoring */
ctype_ucs2, /* ctype */
to_lower_ucs2, /* to_lower */
to_upper_ucs2, /* to_upper */
NULL, /* sort_order */
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
&my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
1, /* caseup_multiply */
1, /* casedn_multiply */
2, /* mbminlen */
2, /* mbmaxlen */
0, /* min_sort_char */
0xFFFF, /* max_sort_char */
' ', /* pad char */
0, /* escape_with_backslash_is_dangerous */
MY_CS_COLL_LEVELS_S1,
&my_charset_ucs2_handler,
&my_collation_ucs2_nopad_bin_handler
};
#endif /* HAVE_CHARSET_ucs2 */