mirror of
https://github.com/MariaDB/server.git
synced 2025-01-15 19:42:28 +01:00
UCS2 charset has been added
This commit is contained in:
parent
16c2fdffc0
commit
2ec3617476
6 changed files with 384 additions and 2 deletions
|
@ -94,6 +94,7 @@
|
|||
#undef HAVE_CHARSET_sjis
|
||||
#undef HAVE_CHARSET_swe7
|
||||
#undef HAVE_CHARSET_tis620
|
||||
#undef HAVE_CHARSET_ucs2
|
||||
#undef HAVE_CHARSET_ujis
|
||||
#undef HAVE_CHARSET_usa7
|
||||
#undef HAVE_CHARSET_utf8
|
||||
|
|
|
@ -1942,10 +1942,10 @@ AC_DIVERT_PUSH(0)
|
|||
CHARSETS_AVAILABLE="armscii8 big5 cp1251 cp1257
|
||||
croat czech danish dec8 dos estonia euc_kr gb2312 gbk
|
||||
german1 greek hebrew hp8 hungarian koi8_ru koi8_ukr
|
||||
latin1 latin1_de latin2 latin5 sjis swe7 tis620 ujis
|
||||
latin1 latin1_de latin2 latin5 sjis swe7 tis620 ucs2 ujis
|
||||
usa7 utf8 win1250 win1250ch win1251ukr"
|
||||
CHARSETS_DEPRECATED="win1251"
|
||||
CHARSETS_COMPLEX="big5 czech euc_kr gb2312 gbk latin1_de sjis tis620 ujis utf8 win1250ch"
|
||||
CHARSETS_COMPLEX="big5 czech euc_kr gb2312 gbk latin1_de sjis tis620 ucs2 ujis utf8 win1250ch"
|
||||
DEFAULT_CHARSET=latin1
|
||||
AC_DIVERT_POP
|
||||
|
||||
|
@ -2067,6 +2067,10 @@ do
|
|||
tis620)
|
||||
AC_DEFINE(HAVE_CHARSET_tis620)
|
||||
;;
|
||||
ucs2)
|
||||
AC_DEFINE(HAVE_CHARSET_ucs2)
|
||||
use_mb="yes"
|
||||
;;
|
||||
ujis)
|
||||
AC_DEFINE(HAVE_CHARSET_ujis)
|
||||
use_mb="yes"
|
||||
|
|
|
@ -287,6 +287,40 @@ void my_hash_sort_utf8(struct charset_info_st *cs, const uchar *key, uint len, u
|
|||
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef HAVE_CHARSET_ucs2
|
||||
|
||||
extern uchar ctype_ucs2[];
|
||||
extern uchar to_lower_ucs2[];
|
||||
extern uchar to_upper_ucs2[];
|
||||
|
||||
int my_strnncoll_ucs2(CHARSET_INFO *cs,
|
||||
const uchar *s, uint s_len, const uchar *t, uint t_len);
|
||||
|
||||
int my_strnxfrm_ucs2(CHARSET_INFO *cs,
|
||||
uchar *dest, uint destlen, const uchar *src, uint srclen);
|
||||
|
||||
int my_ismbchar_ucs2(CHARSET_INFO *cs, const char *b, const char *e);
|
||||
my_bool my_ismbhead_ucs2(CHARSET_INFO * cs, uint ch);
|
||||
int my_mbcharlen_ucs2(CHARSET_INFO *cs, uint c);
|
||||
|
||||
void my_caseup_str_ucs2(CHARSET_INFO * cs, char * s);
|
||||
void my_casedn_str_ucs2(CHARSET_INFO *cs, char * s);
|
||||
void my_caseup_ucs2(CHARSET_INFO *cs, char *s, uint len);
|
||||
void my_casedn_ucs2(CHARSET_INFO *cs, char *s, uint len);
|
||||
|
||||
int my_strcasecmp_ucs2(CHARSET_INFO *cs, const char *s, const char *t);
|
||||
int my_strncasecmp_ucs2(CHARSET_INFO *cs, const char *s,const char *t,uint l);
|
||||
|
||||
int my_ucs2_uni (CHARSET_INFO *cs, my_wc_t *p, const uchar *s, const uchar *e);
|
||||
int my_uni_ucs2 (CHARSET_INFO *cs, my_wc_t wc, uchar *b, uchar *e);
|
||||
|
||||
uint my_hash_caseup_ucs2(struct charset_info_st *cs, const byte *key, uint len);
|
||||
void my_hash_sort_ucs2(struct charset_info_st *cs, const uchar *key, uint len, ulong *nr1, ulong *nr2);
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#define _U 01 /* Upper case */
|
||||
#define _L 02 /* Lower case */
|
||||
#define _NMR 04 /* Numeral (digit) */
|
||||
|
|
|
@ -39,3 +39,4 @@ latin1_de 31
|
|||
armscii8 32
|
||||
utf8 33
|
||||
win1250ch 34
|
||||
ucs2 35
|
||||
|
|
|
@ -25,6 +25,10 @@
|
|||
#define HAVE_UNIDATA
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_CHARSET_ucs2
|
||||
#define HAVE_UNIDATA
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_UNIDATA
|
||||
|
||||
static MY_UNICASE_INFO plane00[]={
|
||||
|
@ -1999,4 +2003,309 @@ int main()
|
|||
|
||||
#endif
|
||||
|
||||
#endif /* HAVE_CHARSET_UTF8 */
|
||||
|
||||
|
||||
|
||||
#ifdef HAVE_CHARSET_ucs2
|
||||
|
||||
uchar ctype_ucs2[] = {
|
||||
0,
|
||||
32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32,
|
||||
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
|
||||
72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
|
||||
132,132,132,132,132,132,132,132,132,132, 16, 16, 16, 16, 16, 16,
|
||||
16,129,129,129,129,129,129, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16, 16,
|
||||
16,130,130,130,130,130,130, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, 16, 16, 16, 32,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
};
|
||||
|
||||
uchar to_lower_ucs2[] = {
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
||||
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
|
||||
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
||||
64, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
|
||||
112,113,114,115,116,117,118,119,120,121,122, 91, 92, 93, 94, 95,
|
||||
96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
|
||||
112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
|
||||
128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
|
||||
144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
|
||||
160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
|
||||
176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
|
||||
192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
|
||||
208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
|
||||
224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
|
||||
240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
|
||||
};
|
||||
|
||||
uchar to_upper_ucs2[] = {
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
||||
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
|
||||
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
||||
64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
|
||||
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
|
||||
96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
|
||||
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127,
|
||||
128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
|
||||
144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
|
||||
160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
|
||||
176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
|
||||
192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
|
||||
208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
|
||||
224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
|
||||
240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
|
||||
};
|
||||
|
||||
|
||||
int my_ucs2_uni (CHARSET_INFO *cs __attribute__((unused)) ,
|
||||
my_wc_t * pwc, const uchar *s, const uchar *e)
|
||||
{
|
||||
if (s+2 > e) /* Need 2 characters */
|
||||
return MY_CS_ILSEQ;
|
||||
|
||||
*pwc= ((unsigned char)s[0]) * 256 + ((unsigned char)s[1]);
|
||||
return 2;
|
||||
}
|
||||
|
||||
int my_uni_ucs2 (CHARSET_INFO *cs __attribute__((unused)) ,
|
||||
my_wc_t wc, uchar *r, uchar *e)
|
||||
{
|
||||
if ( r+2 > e )
|
||||
return MY_CS_TOOSMALL;
|
||||
|
||||
r[0]=wc >> 8;
|
||||
r[1]=wc & 0xFF;
|
||||
return 2;
|
||||
}
|
||||
|
||||
|
||||
void my_caseup_ucs2(CHARSET_INFO *cs, char *s, uint slen)
|
||||
{
|
||||
my_wc_t wc;
|
||||
int res;
|
||||
char *e=s+slen;
|
||||
|
||||
while ((s < e) && (res=my_ucs2_uni(cs,&wc, (uchar *)s, (uchar*)e))>0 )
|
||||
{
|
||||
int plane = (wc>>8) & 0xFF;
|
||||
wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].toupper : wc;
|
||||
if (res != my_uni_ucs2(cs,wc,(uchar*)s,(uchar*)e))
|
||||
break;
|
||||
s+=res;
|
||||
}
|
||||
}
|
||||
|
||||
uint my_hash_caseup_ucs2(CHARSET_INFO *cs, const byte *s, uint slen)
|
||||
{
|
||||
my_wc_t wc;
|
||||
register uint nr=1, nr2=4;
|
||||
int res;
|
||||
const char *e=s+slen;
|
||||
|
||||
while ((s < e) && (res=my_ucs2_uni(cs,&wc, (uchar *)s, (uchar*)e))>0 )
|
||||
{
|
||||
int plane = (wc>>8) & 0xFF;
|
||||
wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].toupper : wc;
|
||||
nr^= (((nr & 63)+nr2)*(wc & 0xFF))+ (nr << 8);
|
||||
nr2+=3;
|
||||
nr^= (((nr & 63)+nr2)*(wc >> 8))+ (nr << 8);
|
||||
nr2+=3;
|
||||
|
||||
s+=res;
|
||||
}
|
||||
|
||||
return nr;
|
||||
}
|
||||
|
||||
|
||||
void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, uint slen, ulong *n1, ulong *n2)
|
||||
{
|
||||
my_wc_t wc;
|
||||
int res;
|
||||
const uchar *e=s+slen;
|
||||
|
||||
while ((s < e) && (res=my_ucs2_uni(cs,&wc, (uchar *)s, (uchar*)e))>0 )
|
||||
{
|
||||
int plane = (wc>>8) & 0xFF;
|
||||
wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].sort : wc;
|
||||
n1[0]^= (((n1[0] & 63)+n2[0])*(wc & 0xFF))+ (n1[0] << 8);
|
||||
n2[0]+=3;
|
||||
n1[0]^= (((n1[0] & 63)+n2[0])*(wc >> 8))+ (n1[0] << 8);
|
||||
n2[0]+=3;
|
||||
s+=res;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void my_caseup_str_ucs2(CHARSET_INFO * cs __attribute__((unused)),
|
||||
char * s __attribute__((unused)))
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
|
||||
void my_casedn_ucs2(CHARSET_INFO *cs, char *s, uint slen)
|
||||
{
|
||||
my_wc_t wc;
|
||||
int res;
|
||||
char *e=s+slen;
|
||||
|
||||
while ((s < e) && (res=my_ucs2_uni(cs, &wc, (uchar*)s, (uchar*)e))>0)
|
||||
{
|
||||
int plane = (wc>>8) & 0xFF;
|
||||
wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].tolower : wc;
|
||||
if (res != my_uni_ucs2(cs, wc, (uchar*)s, (uchar*)e))
|
||||
{
|
||||
break;
|
||||
}
|
||||
s+=res;
|
||||
}
|
||||
}
|
||||
|
||||
void my_casedn_str_ucs2(CHARSET_INFO *cs __attribute__((unused)),
|
||||
char * s __attribute__((unused)))
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
int my_strnncoll_ucs2(CHARSET_INFO *cs,
|
||||
const uchar *s, uint slen, const uchar *t, uint tlen)
|
||||
{
|
||||
int s_res,t_res;
|
||||
my_wc_t s_wc,t_wc;
|
||||
const uchar *se=s+slen;
|
||||
const uchar *te=t+tlen;
|
||||
|
||||
while ( s < se && t < te )
|
||||
{
|
||||
int plane;
|
||||
s_res=my_ucs2_uni(cs,&s_wc, s, se);
|
||||
t_res=my_ucs2_uni(cs,&t_wc, t, te);
|
||||
|
||||
if ( s_res <= 0 || t_res <= 0 )
|
||||
{
|
||||
/* Incorrect string, compare by char value */
|
||||
return ((int)s[0]-(int)t[0]);
|
||||
}
|
||||
|
||||
plane=(s_wc>>8) & 0xFF;
|
||||
s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].sort : s_wc;
|
||||
plane=(t_wc>>8) & 0xFF;
|
||||
t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].sort : t_wc;
|
||||
if ( s_wc != t_wc )
|
||||
{
|
||||
return ((int) s_wc) - ((int) t_wc);
|
||||
}
|
||||
|
||||
s+=s_res;
|
||||
t+=t_res;
|
||||
}
|
||||
return ( (se-s) - (te-t) );
|
||||
}
|
||||
|
||||
int my_strncasecmp_ucs2(CHARSET_INFO *cs,
|
||||
const char *s, const char *t, uint len)
|
||||
{
|
||||
int s_res,t_res;
|
||||
my_wc_t s_wc,t_wc;
|
||||
const char *se=s+len;
|
||||
const char *te=t+len;
|
||||
|
||||
while ( s < se && t < te )
|
||||
{
|
||||
int plane;
|
||||
|
||||
s_res=my_ucs2_uni(cs,&s_wc, (const uchar*)s, (const uchar*)se);
|
||||
t_res=my_ucs2_uni(cs,&t_wc, (const uchar*)t, (const uchar*)te);
|
||||
|
||||
if ( s_res <= 0 || t_res <= 0 )
|
||||
{
|
||||
/* Incorrect string, compare by char value */
|
||||
return ((int)s[0]-(int)t[0]);
|
||||
}
|
||||
|
||||
plane=(s_wc>>8) & 0xFF;
|
||||
s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].tolower : s_wc;
|
||||
|
||||
plane=(t_wc>>8) & 0xFF;
|
||||
t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].tolower : t_wc;
|
||||
|
||||
if ( s_wc != t_wc )
|
||||
return ((int) s_wc) - ((int) t_wc);
|
||||
|
||||
s+=s_res;
|
||||
t+=t_res;
|
||||
}
|
||||
return ( (se-s) - (te-t) );
|
||||
}
|
||||
|
||||
int my_strcasecmp_ucs2(CHARSET_INFO *cs, const char *s, const char *t)
|
||||
{
|
||||
uint s_len=strlen(s);
|
||||
uint t_len=strlen(t);
|
||||
uint len = (s_len > t_len) ? s_len : t_len;
|
||||
return my_strncasecmp_ucs2(cs, s, t, len);
|
||||
}
|
||||
|
||||
int my_strnxfrm_ucs2(CHARSET_INFO *cs,
|
||||
uchar *dst, uint dstlen, const uchar *src, uint srclen)
|
||||
{
|
||||
my_wc_t wc;
|
||||
int res;
|
||||
int plane;
|
||||
uchar *de = dst + dstlen;
|
||||
const uchar *se = src + srclen;
|
||||
const uchar *dst_orig = dst;
|
||||
|
||||
while( src < se && dst < de )
|
||||
{
|
||||
if ((res=my_ucs2_uni(cs,&wc, src, se))<0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
src+=res;
|
||||
srclen-=res;
|
||||
|
||||
plane=(wc>>8) & 0xFF;
|
||||
wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].sort : wc;
|
||||
|
||||
if ((res=my_uni_ucs2(cs,wc,dst,de)) <0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
dst+=res;
|
||||
}
|
||||
return dst - dst_orig;
|
||||
}
|
||||
|
||||
int my_ismbchar_ucs2(CHARSET_INFO *cs __attribute__((unused)),
|
||||
const char *b __attribute__((unused)),
|
||||
const char *e __attribute__((unused)))
|
||||
{
|
||||
return 2;
|
||||
}
|
||||
|
||||
my_bool my_ismbhead_ucs2(CHARSET_INFO *cs __attribute__((unused)) ,
|
||||
uint ch __attribute__((unused)))
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
int my_mbcharlen_ucs2(CHARSET_INFO *cs __attribute__((unused)) ,
|
||||
uint c __attribute__((unused)))
|
||||
{
|
||||
return 2;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -3635,6 +3635,39 @@ CHARSET_INFO compiled_charsets[] = {
|
|||
},
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_CHARSET_ucs2
|
||||
{
|
||||
35, /* number */
|
||||
"ucs2", /* name */
|
||||
ctype_ucs2, /* ctype */
|
||||
to_lower_ucs2, /* to_lower */
|
||||
to_upper_ucs2, /* to_upper */
|
||||
to_upper_ucs2, /* sort_order */
|
||||
NULL, /* tab_to_uni */
|
||||
NULL, /* tab_from_uni */
|
||||
1, /* strxfrm_multiply */
|
||||
my_strnncoll_ucs2, /* strnncoll */
|
||||
my_strnxfrm_ucs2, /* strnxfrm */
|
||||
NULL, /* like_range */
|
||||
2, /* mbmaxlen */
|
||||
my_ismbchar_ucs2, /* ismbchar */
|
||||
my_ismbhead_ucs2, /* ismbhead */
|
||||
my_mbcharlen_ucs2, /* mbcharlen */
|
||||
my_ucs2_uni, /* mb_wc */
|
||||
my_uni_ucs2, /* wc_mb */
|
||||
my_caseup_str_ucs2,
|
||||
my_casedn_str_ucs2,
|
||||
my_caseup_ucs2,
|
||||
my_casedn_ucs2,
|
||||
my_strcasecmp_ucs2,
|
||||
my_strncasecmp_ucs2,
|
||||
my_hash_caseup_ucs2,/* hash_caseup */
|
||||
my_hash_sort_ucs2, /* hash_sort */
|
||||
0
|
||||
},
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef HAVE_CHARSET_ujis
|
||||
{
|
||||
12, /* number */
|
||||
|
|
Loading…
Reference in a new issue